# main dataset
d_import <- read_csv("data/final_dataset.csv", guess_max = 3000, na = c("Unreported", "NA"))
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_character(),
## start_date = col_datetime(format = ""),
## sample_size = col_double(),
## Day_inferred = col_logical(),
## Date_registration_format = col_datetime(format = "")
## )
## ℹ Use `spec()` for the full column specifications.
# 15% sample needed for calculating contradictions
d_man <- read_xlsx("data/manual_processing/manual_extraction/Manual_extraction_all.xlsx")
## New names:
## * `` -> ...36
d <- d_import %>%
select(TrialID, study_arm:analyst_blind) %>%
mutate_if(is.character, as.factor)
# convergence issues with source registry, so group infrequent categories
d <- d %>%
mutate(source_registry = fct_lump_n(d$source_registry, n = 6))
# for use in analysis 10
main_dataset_2 <- d[which(d$study_arm %in% c("covid", "main")),]
main_dataset_2$covid <- (main_dataset_2$study_arm == "covid")
indication_dataset_2 <- d[which(d$study_arm %in% c("covid", "im")),]
indication_dataset_2$covid <- (indication_dataset_2$study_arm == "covid")
Outlier check
hist(d_import$sample_size)

Manual check of some of the highest ones indicates that they are accurate.
# label variables for table1
label(d$control_arm) <- "Control arm"
label(d$randomisation) <- "Randomisation"
label(d$blinding) <- "Blinding"
label(d$prospective) <- "Prospective registration"
label(d$source_registry) <- "Source registry"
label(d$phase_clean) <- "Phase"
label(d$region_Africa) <- "Africa"
label(d$region_N_America) <- "North America"
label(d$region_L_America) <- "Latin America"
label(d$region_Asia) <- "Asia"
label(d$region_Europe) <- "Europe"
label(d$region_Oceania) <- "Oceania"
label(d$multicentre) <- "Multicentre"
label(d$primary_purpose) <- "Primary purpose"
label(d$sponsor_type) <- "Sponsor type"
label(d$sample_size) <- "Sample size"
label(d$vaccine) <- "Vaccine"
label(d$conventional) <- "Conventional"
label(d$traditional) <- "Traditional"
label(d$subject_blind) <- "Subject blind"
label(d$caregiver_blind) <- "Caregiver blind"
label(d$investigator_blind) <- "Investigator blind"
label(d$outcome_blind) <- "Outcome blind"
label(d$analyst_blind) <- "Analyst blind"
d_man <- d_man %>%
filter(Exclude != "Yes")
The following was used for quality control criterion #3.
table(d_man$ContradictionControlArm, useNA = "ifany")
##
## No
## 372
table(d_man$ContradictionRandomisation, useNA = "ifany")
##
## No Yes
## 369 3
table(d_man$ContradictionBlinding, useNA = "ifany")
##
## No Yes
## 364 8
table(d_man$ContradictionProspectiveRegistration, useNA = "ifany")
##
## No Yes
## 361 11
prop.table(table(d_man$ContradictionControlArm, useNA = "ifany")) * 100
##
## No
## 100
prop.table(table(d_man$ContradictionRandomisation, useNA = "ifany")) * 100
##
## No Yes
## 99.1935484 0.8064516
prop.table(table(d_man$ContradictionBlinding, useNA = "ifany")) * 100
##
## No Yes
## 97.849462 2.150538
prop.table(table(d_man$ContradictionProspectiveRegistration, useNA = "ifany")) * 100
##
## No Yes
## 97.043011 2.956989
Descriptive
This is what the data look like before changing any of the variables as per the protocol (e.g. before changing randomisation = Not applicable to randomisation = No). This is Table 4 in the paper.
# make col headings nicer
d_tab <- d %>%
mutate(study_arm =
case_when(study_arm == "covid" ~ "Covid",
study_arm == "im" ~ "Indication matched",
study_arm == "main" ~ "Main"))
# group together infrequent source registries for a nicer table
# d_tab <- d_tab %>%
# mutate(source_registry = fct_lump_n(d_tab$source_registry, n = 6))
table1(~ control_arm + randomisation + blinding + prospective +
source_registry + phase_clean +
region_Africa + region_N_America + region_L_America +
region_Asia + region_Europe + region_Oceania +
multicentre + primary_purpose + sponsor_type + sample_size +
vaccine + conventional + traditional +
subject_blind + caregiver_blind + investigator_blind +
outcome_blind + analyst_blind
| study_arm,
data=d_tab, overall = "All",
render.continuous=c(.="Mean (SD)", .="Median [Q1, Q3]"))
|
Covid (N=818) |
Indication matched (N=839) |
Main (N=844) |
All (N=2501) |
| Control arm |
|
|
|
|
| No |
105 (12.8%) |
98 (11.7%) |
211 (25.0%) |
414 (16.6%) |
| Yes |
713 (87.2%) |
740 (88.2%) |
632 (74.9%) |
2085 (83.4%) |
| Missing |
0 (0%) |
1 (0.1%) |
1 (0.1%) |
2 (0.1%) |
| Randomisation |
|
|
|
|
| No |
41 (5.0%) |
45 (5.4%) |
41 (4.9%) |
127 (5.1%) |
| Not applicable |
105 (12.8%) |
98 (11.7%) |
211 (25.0%) |
414 (16.6%) |
| Yes |
669 (81.8%) |
693 (82.6%) |
587 (69.5%) |
1949 (77.9%) |
| Missing |
3 (0.4%) |
3 (0.4%) |
5 (0.6%) |
11 (0.4%) |
| Blinding |
|
|
|
|
| No |
424 (51.8%) |
296 (35.3%) |
433 (51.3%) |
1153 (46.1%) |
| Yes |
366 (44.7%) |
515 (61.4%) |
387 (45.9%) |
1268 (50.7%) |
| Missing |
28 (3.4%) |
28 (3.3%) |
24 (2.8%) |
80 (3.2%) |
| Prospective registration |
|
|
|
|
| No |
259 (31.7%) |
281 (33.5%) |
222 (26.3%) |
762 (30.5%) |
| Yes |
559 (68.3%) |
558 (66.5%) |
622 (73.7%) |
1739 (69.5%) |
| Source registry |
|
|
|
|
| ChiCTR |
57 (7.0%) |
42 (5.0%) |
60 (7.1%) |
159 (6.4%) |
| CT.gov |
417 (51.0%) |
455 (54.2%) |
352 (41.7%) |
1224 (48.9%) |
| CTRI |
72 (8.8%) |
27 (3.2%) |
63 (7.5%) |
162 (6.5%) |
| EUCTR |
104 (12.7%) |
145 (17.3%) |
182 (21.6%) |
431 (17.2%) |
| IRCT |
109 (13.3%) |
60 (7.2%) |
58 (6.9%) |
227 (9.1%) |
| JPRN |
10 (1.2%) |
45 (5.4%) |
67 (7.9%) |
122 (4.9%) |
| Other |
49 (6.0%) |
65 (7.7%) |
62 (7.3%) |
176 (7.0%) |
| Phase |
|
|
|
|
| Phase 1 |
85 (10.4%) |
86 (10.3%) |
109 (12.9%) |
280 (11.2%) |
| Phase 2 |
293 (35.8%) |
181 (21.6%) |
250 (29.6%) |
724 (28.9%) |
| Phase 3 |
280 (34.2%) |
253 (30.2%) |
235 (27.8%) |
768 (30.7%) |
| Phase 4 |
61 (7.5%) |
186 (22.2%) |
118 (14.0%) |
365 (14.6%) |
| Undefined |
93 (11.4%) |
111 (13.2%) |
113 (13.4%) |
317 (12.7%) |
| Missing |
6 (0.7%) |
22 (2.6%) |
19 (2.3%) |
47 (1.9%) |
| Africa |
|
|
|
|
| No |
770 (94.1%) |
771 (91.9%) |
791 (93.7%) |
2332 (93.2%) |
| Yes |
48 (5.9%) |
68 (8.1%) |
53 (6.3%) |
169 (6.8%) |
| North America |
|
|
|
|
| No |
649 (79.3%) |
582 (69.4%) |
569 (67.4%) |
1800 (72.0%) |
| Yes |
169 (20.7%) |
257 (30.6%) |
275 (32.6%) |
701 (28.0%) |
| Latin America |
|
|
|
|
| No |
727 (88.9%) |
760 (90.6%) |
750 (88.9%) |
2237 (89.4%) |
| Yes |
91 (11.1%) |
79 (9.4%) |
94 (11.1%) |
264 (10.6%) |
| Asia |
|
|
|
|
| No |
458 (56.0%) |
469 (55.9%) |
360 (42.7%) |
1287 (51.5%) |
| Yes |
360 (44.0%) |
370 (44.1%) |
484 (57.3%) |
1214 (48.5%) |
| Europe |
|
|
|
|
| No |
596 (72.9%) |
542 (64.6%) |
540 (64.0%) |
1678 (67.1%) |
| Yes |
222 (27.1%) |
297 (35.4%) |
304 (36.0%) |
823 (32.9%) |
| Oceania |
|
|
|
|
| No |
802 (98.0%) |
778 (92.7%) |
759 (89.9%) |
2339 (93.5%) |
| Yes |
16 (2.0%) |
61 (7.3%) |
85 (10.1%) |
162 (6.5%) |
| Multicentre |
|
|
|
|
| No |
401 (49.0%) |
411 (49.0%) |
401 (47.5%) |
1213 (48.5%) |
| Yes |
373 (45.6%) |
377 (44.9%) |
384 (45.5%) |
1134 (45.3%) |
| Missing |
44 (5.4%) |
51 (6.1%) |
59 (7.0%) |
154 (6.2%) |
| Primary purpose |
|
|
|
|
| Other |
26 (3.2%) |
101 (12.0%) |
111 (13.2%) |
238 (9.5%) |
| Prevention |
121 (14.8%) |
268 (31.9%) |
88 (10.4%) |
477 (19.1%) |
| Treatment |
671 (82.0%) |
470 (56.0%) |
645 (76.4%) |
1786 (71.4%) |
| Sponsor type |
|
|
|
|
| Industry |
182 (22.2%) |
261 (31.1%) |
326 (38.6%) |
769 (30.7%) |
| Investigator |
52 (6.4%) |
35 (4.2%) |
42 (5.0%) |
129 (5.2%) |
| Non industry |
573 (70.0%) |
536 (63.9%) |
433 (51.3%) |
1542 (61.7%) |
| Missing |
11 (1.3%) |
7 (0.8%) |
43 (5.1%) |
61 (2.4%) |
| Sample size |
|
|
|
|
| Mean (SD) |
893 (5970) |
723 (4820) |
397 (4890) |
668 (5250) |
| Median [Q1, Q3] |
100 [50.0, 277] |
120 [45.0, 308] |
80.0 [40.0, 199] |
100 [42.0, 260] |
| Missing |
2 (0.2%) |
2 (0.2%) |
0 (0%) |
4 (0.2%) |
| Vaccine |
|
|
|
|
| No |
771 (94.3%) |
595 (70.9%) |
816 (96.7%) |
2182 (87.2%) |
| Yes |
47 (5.7%) |
244 (29.1%) |
28 (3.3%) |
319 (12.8%) |
| Conventional |
|
|
|
|
| No |
134 (16.4%) |
273 (32.5%) |
76 (9.0%) |
483 (19.3%) |
| Yes |
684 (83.6%) |
566 (67.5%) |
768 (91.0%) |
2018 (80.7%) |
| Traditional |
|
|
|
|
| No |
713 (87.2%) |
776 (92.5%) |
764 (90.5%) |
2253 (90.1%) |
| Yes |
105 (12.8%) |
63 (7.5%) |
80 (9.5%) |
248 (9.9%) |
| Subject blind |
|
|
|
|
| No |
424 (51.8%) |
318 (37.9%) |
416 (49.3%) |
1158 (46.3%) |
| Yes |
215 (26.3%) |
268 (31.9%) |
175 (20.7%) |
658 (26.3%) |
| Missing |
179 (21.9%) |
253 (30.2%) |
253 (30.0%) |
685 (27.4%) |
| Caregiver blind |
|
|
|
|
| No |
508 (62.1%) |
420 (50.1%) |
500 (59.2%) |
1428 (57.1%) |
| Yes |
131 (16.0%) |
166 (19.8%) |
87 (10.3%) |
384 (15.4%) |
| Missing |
179 (21.9%) |
253 (30.2%) |
257 (30.5%) |
689 (27.5%) |
| Investigator blind |
|
|
|
|
| No |
460 (56.2%) |
330 (39.3%) |
458 (54.3%) |
1248 (49.9%) |
| Yes |
179 (21.9%) |
256 (30.5%) |
133 (15.8%) |
568 (22.7%) |
| Missing |
179 (21.9%) |
253 (30.2%) |
253 (30.0%) |
685 (27.4%) |
| Outcome blind |
|
|
|
|
| No |
517 (63.2%) |
401 (47.8%) |
485 (57.5%) |
1403 (56.1%) |
| Yes |
122 (14.9%) |
189 (22.5%) |
103 (12.2%) |
414 (16.6%) |
| Missing |
179 (21.9%) |
249 (29.7%) |
256 (30.3%) |
684 (27.3%) |
| Analyst blind |
|
|
|
|
| No |
637 (77.9%) |
583 (69.5%) |
587 (69.5%) |
1807 (72.3%) |
| Yes |
2 (0.2%) |
3 (0.4%) |
0 (0%) |
5 (0.2%) |
| Missing |
179 (21.9%) |
253 (30.2%) |
257 (30.5%) |
689 (27.5%) |
We specified that we would treat unknown/unreported values in particular ways. We implement this here to generate the dataset for analysis.
d$randomisation[d$randomisation == "Not applicable"] <- "No"
d$randomisation[is.na(d$randomisation)]<- "No"
d$blinding[is.na(d$blinding)]<- "No"
d$multicentre[is.na(d$multicentre)] <- "No"
# this is a deviation agreed with the editor
d$control_arm[is.na(d$control_arm)] <- "No"
# d$sample_size <- log(d$sample_size)
# to impute: phase, sample size (for only 3 trials) and sponsor type
Check log sample size
Below we checked the linearity in the logit assumption as per the preregistration. Generally log transformed sample size appeared to be better.
# check linearity in logit
d_check <- d
d_check <- d_check[-which(is.na(d_check$sample_size)), ]
quantile(d_check$sample_size, probs = seq(0, 1, 1/5), na.rm = T)
## 0% 20% 40% 60% 80% 100%
## 1.0 38.0 70.0 133.0 342.8 140000.0
d_check <- d_check %>%
mutate(
quintile_group =
case_when(sample_size <= 38 ~ "1",
sample_size > 38 & sample_size <=70 ~ "2",
sample_size > 70 & sample_size <=133 ~ "3",
sample_size > 133 & sample_size <=342.6 ~ "4",
sample_size > 342.6 ~ "5")
)
table(d_check$control_arm)
##
## No Yes
## 413 2084
d_check$control_arm <- as.character(d_check$control_arm)
d_check[d_check$control_arm == "Yes",]$control_arm <- "1"
d_check[d_check$control_arm == "No",]$control_arm <- "0"
d_check$control_arm <- as.numeric(d_check$control_arm)
d_check$randomisation <- as.character(d_check$randomisation)
d_check[d_check$randomisation == "Yes",]$randomisation <- "1"
d_check[d_check$randomisation == "No",]$randomisation <- "0"
d_check$randomisation <- as.numeric(d_check$randomisation)
d_check$blinding <- as.character(d_check$blinding)
d_check[d_check$blinding == "Yes",]$blinding <- "1"
d_check[d_check$blinding == "No",]$blinding <- "0"
d_check$blinding <- as.numeric(d_check$blinding)
d_check$prospective <- as.character(d_check$prospective)
d_check[d_check$prospective == "Yes",]$prospective <- "1"
d_check[d_check$prospective == "No",]$prospective <- "0"
d_check$prospective <- as.numeric(d_check$prospective)
s <- d_check %>%
group_by(quintile_group) %>%
summarise(prop = sum(control_arm)/ length(control_arm),
log_odds_control = log(prop / (1-prop)),
prop = sum(randomisation)/ length(randomisation),
log_odds_randomisation = log(prop / (1-prop)),
prop = sum(blinding)/ length(blinding),
log_odds_blinding = log(prop / (1-prop)),
prop = sum(prospective)/ length(prospective),
log_odds_prospective = log(prop / (1-prop)),
median = median(sample_size),
median_log = median(log(sample_size)))
## `summarise()` ungrouping output (override with `.groups` argument)
plot(s$median, s$log_odds_control)

plot(s$median, s$log_odds_randomisation)

plot(s$median, s$log_odds_blinding)

plot(s$median, s$log_odds_prospective)

plot(s$median_log, s$log_odds_control)

plot(s$median_log, s$log_odds_randomisation)

plot(s$median_log, s$log_odds_blinding)

plot(s$median_log, s$log_odds_prospective)

# log sample size seems better fit
d$sample_size <- log(d$sample_size)
Summary of data
summary(d)
## TrialID study_arm start_date
## ChiCTR1900024129 : 2 covid:818 Min. :1999-12-31 00:00:00
## JPRN-jRCTs011180015: 2 im :839 1st Qu.:2018-08-27 00:00:00
## NCT03814720 : 2 main :844 Median :2019-08-21 12:00:00
## NCT03982069 : 2 Mean :2019-02-24 02:55:12
## NCT04157998 : 2 3rd Qu.:2020-04-27 00:00:00
## NCT04161339 : 2 Max. :2021-08-01 00:00:00
## (Other) :2489 NA's :93
## control_arm randomisation blinding prospective source_registry
## No : 416 No : 552 No :1233 No : 762 ChiCTR: 159
## Yes:2085 Not applicable: 0 Yes:1268 Yes:1739 CT.gov:1224
## Yes :1949 CTRI : 162
## EUCTR : 431
## IRCT : 227
## JPRN : 122
## Other : 176
## phase_clean region_Africa region_N_America region_L_America region_Asia
## Phase 1 :280 No :2332 No :1800 No :2237 No :1287
## Phase 2 :724 Yes: 169 Yes: 701 Yes: 264 Yes:1214
## Phase 3 :768
## Phase 4 :365
## Undefined:317
## NA's : 47
##
## region_Europe region_Oceania multicentre primary_purpose sponsor_type
## No :1678 No :2339 No :1367 Other : 238 Industry : 769
## Yes: 823 Yes: 162 Yes:1134 Prevention: 477 Investigator: 129
## Treatment :1786 Non industry:1542
## NA's : 61
##
##
##
## sample_size vaccine conventional traditional subject_blind
## Min. : 0.000 No :2182 No : 483 No :2253 No :1158
## 1st Qu.: 3.738 Yes: 319 Yes:2018 Yes: 248 Yes : 658
## Median : 4.605 NA's: 685
## Mean : 4.752
## 3rd Qu.: 5.561
## Max. :11.849
## NA's :4
## caregiver_blind investigator_blind outcome_blind analyst_blind
## No :1428 No :1248 No :1403 No :1807
## Yes : 384 Yes : 568 Yes : 414 Yes : 5
## NA's: 689 NA's: 685 NA's: 684 NA's: 689
##
##
##
##
Summary of missing data
# mice::md.pattern(d)
VIM::aggr(d, numbers = TRUE, sortVars = TRUE, labels = names(data), cex.axis = 0.7, gap = 3, ylab = c("Proportion of missingness", "Missingness Pattern"), oma = c(10, 5, 5, 3))

##
## Variables sorted by number of missings:
## Variable Count
## caregiver_blind 0.27548980
## analyst_blind 0.27548980
## subject_blind 0.27389044
## investigator_blind 0.27389044
## outcome_blind 0.27349060
## start_date 0.03718513
## sponsor_type 0.02439024
## phase_clean 0.01879248
## sample_size 0.00159936
## TrialID 0.00000000
## study_arm 0.00000000
## control_arm 0.00000000
## randomisation 0.00000000
## blinding 0.00000000
## prospective 0.00000000
## source_registry 0.00000000
## region_Africa 0.00000000
## region_N_America 0.00000000
## region_L_America 0.00000000
## region_Asia 0.00000000
## region_Europe 0.00000000
## region_Oceania 0.00000000
## multicentre 0.00000000
## primary_purpose 0.00000000
## vaccine 0.00000000
## conventional 0.00000000
## traditional 0.00000000
# VIM::marginplot(d[,c(1, 2)])
Power analysis
We calculated power as follows. This code is retained here as some of the values are used later in the script.
# inputs for power analysis
n_comparisons <- 4
total_alpha <- 0.05
alpha <- total_alpha/n_comparisons
z <- qnorm(1-alpha/2)
power <- 0.95
p1 <- 0.5 # probability of outcome in COVID group
p2 <- 0.6 # probability of outcome in comparator group
prop <- 0.5 # proportion of sample in comparator group
# power calculation
sample_size <- SSizeLogisticBin(p1 = p1, p2 = p2, B = prop, alpha = alpha, power = power)
The required sample size with power = 95% and alpha = 0.05 for 4 comparisons, to detect a 10% difference between groups assuming 0.5 probability of outcome in the COVID-19 group is 1693.
Analysis
Some more summaries
table(d$study_arm, useNA = "ifany")
##
## covid im main
## 818 839 844
Generate main and indication-matched datasets:
main_dataset <- d[which(d$study_arm %in% c("covid", "main")),]
main_dataset$covid <- (main_dataset$study_arm == "covid")
indication_dataset <- d[which(d$study_arm %in% c("covid", "im")),]
indication_dataset$covid <- (indication_dataset$study_arm == "covid")
Tables of each outcome by other variables
Note that these have log sample size.
Control arm
table1::table1(~ control_arm + randomisation + blinding + prospective + source_registry + phase_clean + region_Africa + region_N_America + region_L_America + region_Asia + region_Europe + region_Oceania + multicentre + primary_purpose + sponsor_type + sample_size + vaccine + conventional + traditional + subject_blind + caregiver_blind + investigator_blind + outcome_blind + analyst_blind | study_arm * control_arm, data = d)
|
covid |
im |
main |
Overall |
|
No (N=105) |
Yes (N=713) |
No (N=99) |
Yes (N=740) |
No (N=212) |
Yes (N=632) |
No (N=416) |
Yes (N=2085) |
| Control arm |
|
|
|
|
|
|
|
|
| No |
105 (100%) |
0 (0%) |
99 (100%) |
0 (0%) |
212 (100%) |
0 (0%) |
416 (100%) |
0 (0%) |
| Yes |
0 (0%) |
713 (100%) |
0 (0%) |
740 (100%) |
0 (0%) |
632 (100%) |
0 (0%) |
2085 (100%) |
| Randomisation |
|
|
|
|
|
|
|
|
| No |
105 (100%) |
44 (6.2%) |
99 (100%) |
47 (6.4%) |
212 (100%) |
45 (7.1%) |
416 (100%) |
136 (6.5%) |
| Not applicable |
0 (0%) |
0 (0%) |
0 (0%) |
0 (0%) |
0 (0%) |
0 (0%) |
0 (0%) |
0 (0%) |
| Yes |
0 (0%) |
669 (93.8%) |
0 (0%) |
693 (93.6%) |
0 (0%) |
587 (92.9%) |
0 (0%) |
1949 (93.5%) |
| Blinding |
|
|
|
|
|
|
|
|
| No |
105 (100%) |
347 (48.7%) |
98 (99.0%) |
226 (30.5%) |
212 (100%) |
245 (38.8%) |
415 (99.8%) |
818 (39.2%) |
| Yes |
0 (0%) |
366 (51.3%) |
1 (1.0%) |
514 (69.5%) |
0 (0%) |
387 (61.2%) |
1 (0.2%) |
1267 (60.8%) |
| Prospective registration |
|
|
|
|
|
|
|
|
| No |
38 (36.2%) |
221 (31.0%) |
37 (37.4%) |
244 (33.0%) |
58 (27.4%) |
164 (25.9%) |
133 (32.0%) |
629 (30.2%) |
| Yes |
67 (63.8%) |
492 (69.0%) |
62 (62.6%) |
496 (67.0%) |
154 (72.6%) |
468 (74.1%) |
283 (68.0%) |
1456 (69.8%) |
| Source registry |
|
|
|
|
|
|
|
|
| ChiCTR |
5 (4.8%) |
52 (7.3%) |
2 (2.0%) |
40 (5.4%) |
2 (0.9%) |
58 (9.2%) |
9 (2.2%) |
150 (7.2%) |
| CT.gov |
55 (52.4%) |
362 (50.8%) |
49 (49.5%) |
406 (54.9%) |
95 (44.8%) |
257 (40.7%) |
199 (47.8%) |
1025 (49.2%) |
| CTRI |
12 (11.4%) |
60 (8.4%) |
0 (0%) |
27 (3.6%) |
12 (5.7%) |
51 (8.1%) |
24 (5.8%) |
138 (6.6%) |
| EUCTR |
11 (10.5%) |
93 (13.0%) |
20 (20.2%) |
125 (16.9%) |
57 (26.9%) |
125 (19.8%) |
88 (21.2%) |
343 (16.5%) |
| IRCT |
10 (9.5%) |
99 (13.9%) |
1 (1.0%) |
59 (8.0%) |
3 (1.4%) |
55 (8.7%) |
14 (3.4%) |
213 (10.2%) |
| JPRN |
4 (3.8%) |
6 (0.8%) |
20 (20.2%) |
25 (3.4%) |
27 (12.7%) |
40 (6.3%) |
51 (12.3%) |
71 (3.4%) |
| Other |
8 (7.6%) |
41 (5.8%) |
7 (7.1%) |
58 (7.8%) |
16 (7.5%) |
46 (7.3%) |
31 (7.5%) |
145 (7.0%) |
| Phase |
|
|
|
|
|
|
|
|
| Phase 1 |
23 (21.9%) |
62 (8.7%) |
18 (18.2%) |
68 (9.2%) |
28 (13.2%) |
81 (12.8%) |
69 (16.6%) |
211 (10.1%) |
| Phase 2 |
35 (33.3%) |
258 (36.2%) |
15 (15.2%) |
166 (22.4%) |
115 (54.2%) |
135 (21.4%) |
165 (39.7%) |
559 (26.8%) |
| Phase 3 |
16 (15.2%) |
264 (37.0%) |
13 (13.1%) |
240 (32.4%) |
24 (11.3%) |
211 (33.4%) |
53 (12.7%) |
715 (34.3%) |
| Phase 4 |
3 (2.9%) |
58 (8.1%) |
31 (31.3%) |
155 (20.9%) |
21 (9.9%) |
97 (15.3%) |
55 (13.2%) |
310 (14.9%) |
| Undefined |
27 (25.7%) |
66 (9.3%) |
12 (12.1%) |
99 (13.4%) |
13 (6.1%) |
100 (15.8%) |
52 (12.5%) |
265 (12.7%) |
| Missing |
1 (1.0%) |
5 (0.7%) |
10 (10.1%) |
12 (1.6%) |
11 (5.2%) |
8 (1.3%) |
22 (5.3%) |
25 (1.2%) |
| Africa |
|
|
|
|
|
|
|
|
| No |
103 (98.1%) |
667 (93.5%) |
95 (96.0%) |
676 (91.4%) |
206 (97.2%) |
585 (92.6%) |
404 (97.1%) |
1928 (92.5%) |
| Yes |
2 (1.9%) |
46 (6.5%) |
4 (4.0%) |
64 (8.6%) |
6 (2.8%) |
47 (7.4%) |
12 (2.9%) |
157 (7.5%) |
| North America |
|
|
|
|
|
|
|
|
| No |
78 (74.3%) |
571 (80.1%) |
71 (71.7%) |
511 (69.1%) |
134 (63.2%) |
435 (68.8%) |
283 (68.0%) |
1517 (72.8%) |
| Yes |
27 (25.7%) |
142 (19.9%) |
28 (28.3%) |
229 (30.9%) |
78 (36.8%) |
197 (31.2%) |
133 (32.0%) |
568 (27.2%) |
| Latin America |
|
|
|
|
|
|
|
|
| No |
93 (88.6%) |
634 (88.9%) |
96 (97.0%) |
664 (89.7%) |
200 (94.3%) |
550 (87.0%) |
389 (93.5%) |
1848 (88.6%) |
| Yes |
12 (11.4%) |
79 (11.1%) |
3 (3.0%) |
76 (10.3%) |
12 (5.7%) |
82 (13.0%) |
27 (6.5%) |
237 (11.4%) |
| Asia |
|
|
|
|
|
|
|
|
| No |
65 (61.9%) |
393 (55.1%) |
53 (53.5%) |
416 (56.2%) |
99 (46.7%) |
261 (41.3%) |
217 (52.2%) |
1070 (51.3%) |
| Yes |
40 (38.1%) |
320 (44.9%) |
46 (46.5%) |
324 (43.8%) |
113 (53.3%) |
371 (58.7%) |
199 (47.8%) |
1015 (48.7%) |
| Europe |
|
|
|
|
|
|
|
|
| No |
79 (75.2%) |
517 (72.5%) |
66 (66.7%) |
476 (64.3%) |
129 (60.8%) |
411 (65.0%) |
274 (65.9%) |
1404 (67.3%) |
| Yes |
26 (24.8%) |
196 (27.5%) |
33 (33.3%) |
264 (35.7%) |
83 (39.2%) |
221 (35.0%) |
142 (34.1%) |
681 (32.7%) |
| Oceania |
|
|
|
|
|
|
|
|
| No |
104 (99.0%) |
698 (97.9%) |
94 (94.9%) |
684 (92.4%) |
192 (90.6%) |
567 (89.7%) |
390 (93.8%) |
1949 (93.5%) |
| Yes |
1 (1.0%) |
15 (2.1%) |
5 (5.1%) |
56 (7.6%) |
20 (9.4%) |
65 (10.3%) |
26 (6.3%) |
136 (6.5%) |
| Multicentre |
|
|
|
|
|
|
|
|
| No |
71 (67.6%) |
374 (52.5%) |
68 (68.7%) |
394 (53.2%) |
102 (48.1%) |
358 (56.6%) |
241 (57.9%) |
1126 (54.0%) |
| Yes |
34 (32.4%) |
339 (47.5%) |
31 (31.3%) |
346 (46.8%) |
110 (51.9%) |
274 (43.4%) |
175 (42.1%) |
959 (46.0%) |
| Primary purpose |
|
|
|
|
|
|
|
|
| Other |
5 (4.8%) |
21 (2.9%) |
28 (28.3%) |
73 (9.9%) |
17 (8.0%) |
94 (14.9%) |
50 (12.0%) |
188 (9.0%) |
| Prevention |
14 (13.3%) |
107 (15.0%) |
27 (27.3%) |
241 (32.6%) |
10 (4.7%) |
78 (12.3%) |
51 (12.3%) |
426 (20.4%) |
| Treatment |
86 (81.9%) |
585 (82.0%) |
44 (44.4%) |
426 (57.6%) |
185 (87.3%) |
460 (72.8%) |
315 (75.7%) |
1471 (70.6%) |
| Sponsor type |
|
|
|
|
|
|
|
|
| Industry |
20 (19.0%) |
162 (22.7%) |
29 (29.3%) |
232 (31.4%) |
89 (42.0%) |
237 (37.5%) |
138 (33.2%) |
631 (30.3%) |
| Investigator |
9 (8.6%) |
43 (6.0%) |
3 (3.0%) |
32 (4.3%) |
12 (5.7%) |
30 (4.7%) |
24 (5.8%) |
105 (5.0%) |
| Non industry |
72 (68.6%) |
501 (70.3%) |
64 (64.6%) |
472 (63.8%) |
94 (44.3%) |
339 (53.6%) |
230 (55.3%) |
1312 (62.9%) |
| Missing |
4 (3.8%) |
7 (1.0%) |
3 (3.0%) |
4 (0.5%) |
17 (8.0%) |
26 (4.1%) |
24 (5.8%) |
37 (1.8%) |
| Sample size |
|
|
|
|
|
|
|
|
| Mean (SD) |
3.88 (1.36) |
4.99 (1.44) |
3.81 (1.35) |
5.05 (1.49) |
3.76 (1.13) |
4.76 (1.21) |
3.80 (1.25) |
4.94 (1.40) |
| Median [Min, Max] |
3.69 [1.79, 11.8] |
4.61 [0, 11.0] |
3.47 [0, 7.60] |
4.86 [0, 11.7] |
3.69 [0.693, 7.58] |
4.61 [0, 11.8] |
3.66 [0, 11.8] |
4.70 [0, 11.8] |
| Missing |
2 (1.9%) |
0 (0%) |
1 (1.0%) |
1 (0.1%) |
0 (0%) |
0 (0%) |
3 (0.7%) |
1 (0.0%) |
| Vaccine |
|
|
|
|
|
|
|
|
| No |
100 (95.2%) |
671 (94.1%) |
64 (64.6%) |
531 (71.8%) |
203 (95.8%) |
613 (97.0%) |
367 (88.2%) |
1815 (87.1%) |
| Yes |
5 (4.8%) |
42 (5.9%) |
35 (35.4%) |
209 (28.2%) |
9 (4.2%) |
19 (3.0%) |
49 (11.8%) |
270 (12.9%) |
| Conventional |
|
|
|
|
|
|
|
|
| No |
14 (13.3%) |
120 (16.8%) |
35 (35.4%) |
238 (32.2%) |
12 (5.7%) |
64 (10.1%) |
61 (14.7%) |
422 (20.2%) |
| Yes |
91 (86.7%) |
593 (83.2%) |
64 (64.6%) |
502 (67.8%) |
200 (94.3%) |
568 (89.9%) |
355 (85.3%) |
1663 (79.8%) |
| Traditional |
|
|
|
|
|
|
|
|
| No |
96 (91.4%) |
617 (86.5%) |
99 (100%) |
677 (91.5%) |
205 (96.7%) |
559 (88.4%) |
400 (96.2%) |
1853 (88.9%) |
| Yes |
9 (8.6%) |
96 (13.5%) |
0 (0%) |
63 (8.5%) |
7 (3.3%) |
73 (11.6%) |
16 (3.8%) |
232 (11.1%) |
| Subject blind |
|
|
|
|
|
|
|
|
| No |
96 (91.4%) |
328 (46.0%) |
80 (80.8%) |
238 (32.2%) |
198 (93.4%) |
218 (34.5%) |
374 (89.9%) |
784 (37.6%) |
| Yes |
1 (1.0%) |
214 (30.0%) |
0 (0%) |
268 (36.2%) |
0 (0%) |
175 (27.7%) |
1 (0.2%) |
657 (31.5%) |
| Missing |
8 (7.6%) |
171 (24.0%) |
19 (19.2%) |
234 (31.6%) |
14 (6.6%) |
239 (37.8%) |
41 (9.9%) |
644 (30.9%) |
| Caregiver blind |
|
|
|
|
|
|
|
|
| No |
96 (91.4%) |
412 (57.8%) |
80 (80.8%) |
340 (45.9%) |
198 (93.4%) |
302 (47.8%) |
374 (89.9%) |
1054 (50.6%) |
| Yes |
1 (1.0%) |
130 (18.2%) |
0 (0%) |
166 (22.4%) |
0 (0%) |
87 (13.8%) |
1 (0.2%) |
383 (18.4%) |
| Missing |
8 (7.6%) |
171 (24.0%) |
19 (19.2%) |
234 (31.6%) |
14 (6.6%) |
243 (38.4%) |
41 (9.9%) |
648 (31.1%) |
| Investigator blind |
|
|
|
|
|
|
|
|
| No |
96 (91.4%) |
364 (51.1%) |
80 (80.8%) |
250 (33.8%) |
198 (93.4%) |
260 (41.1%) |
374 (89.9%) |
874 (41.9%) |
| Yes |
1 (1.0%) |
178 (25.0%) |
0 (0%) |
256 (34.6%) |
0 (0%) |
133 (21.0%) |
1 (0.2%) |
567 (27.2%) |
| Missing |
8 (7.6%) |
171 (24.0%) |
19 (19.2%) |
234 (31.6%) |
14 (6.6%) |
239 (37.8%) |
41 (9.9%) |
644 (30.9%) |
| Outcome blind |
|
|
|
|
|
|
|
|
| No |
96 (91.4%) |
421 (59.0%) |
80 (80.8%) |
321 (43.4%) |
198 (93.4%) |
287 (45.4%) |
374 (89.9%) |
1029 (49.4%) |
| Yes |
1 (1.0%) |
121 (17.0%) |
1 (1.0%) |
188 (25.4%) |
0 (0%) |
103 (16.3%) |
2 (0.5%) |
412 (19.8%) |
| Missing |
8 (7.6%) |
171 (24.0%) |
18 (18.2%) |
231 (31.2%) |
14 (6.6%) |
242 (38.3%) |
40 (9.6%) |
644 (30.9%) |
| Analyst blind |
|
|
|
|
|
|
|
|
| No |
97 (92.4%) |
540 (75.7%) |
80 (80.8%) |
503 (68.0%) |
198 (93.4%) |
389 (61.6%) |
375 (90.1%) |
1432 (68.7%) |
| Yes |
0 (0%) |
2 (0.3%) |
0 (0%) |
3 (0.4%) |
0 (0%) |
0 (0%) |
0 (0%) |
5 (0.2%) |
| Missing |
8 (7.6%) |
171 (24.0%) |
19 (19.2%) |
234 (31.6%) |
14 (6.6%) |
243 (38.4%) |
41 (9.9%) |
648 (31.1%) |
Randomisation
table1::table1(~ control_arm + randomisation + blinding + prospective + source_registry + phase_clean + region_Africa + region_N_America + region_L_America + region_Asia + region_Europe + region_Oceania + multicentre + primary_purpose + sponsor_type + sample_size + vaccine + conventional + traditional + subject_blind + caregiver_blind + investigator_blind + outcome_blind + analyst_blind | study_arm * randomisation, data = d)
|
covid |
im |
main |
Overall |
|
No (N=149) |
Yes (N=669) |
No (N=146) |
Yes (N=693) |
No (N=257) |
Yes (N=587) |
No (N=552) |
Yes (N=1949) |
| Control arm |
|
|
|
|
|
|
|
|
| No |
105 (70.5%) |
0 (0%) |
99 (67.8%) |
0 (0%) |
212 (82.5%) |
0 (0%) |
416 (75.4%) |
0 (0%) |
| Yes |
44 (29.5%) |
669 (100%) |
47 (32.2%) |
693 (100%) |
45 (17.5%) |
587 (100%) |
136 (24.6%) |
1949 (100%) |
| Randomisation |
|
|
|
|
|
|
|
|
| No |
149 (100%) |
0 (0%) |
146 (100%) |
0 (0%) |
257 (100%) |
0 (0%) |
552 (100%) |
0 (0%) |
| Not applicable |
0 (0%) |
0 (0%) |
0 (0%) |
0 (0%) |
0 (0%) |
0 (0%) |
0 (0%) |
0 (0%) |
| Yes |
0 (0%) |
669 (100%) |
0 (0%) |
693 (100%) |
0 (0%) |
587 (100%) |
0 (0%) |
1949 (100%) |
| Blinding |
|
|
|
|
|
|
|
|
| No |
147 (98.7%) |
305 (45.6%) |
134 (91.8%) |
190 (27.4%) |
254 (98.8%) |
203 (34.6%) |
535 (96.9%) |
698 (35.8%) |
| Yes |
2 (1.3%) |
364 (54.4%) |
12 (8.2%) |
503 (72.6%) |
3 (1.2%) |
384 (65.4%) |
17 (3.1%) |
1251 (64.2%) |
| Prospective registration |
|
|
|
|
|
|
|
|
| No |
57 (38.3%) |
202 (30.2%) |
58 (39.7%) |
223 (32.2%) |
69 (26.8%) |
153 (26.1%) |
184 (33.3%) |
578 (29.7%) |
| Yes |
92 (61.7%) |
467 (69.8%) |
88 (60.3%) |
470 (67.8%) |
188 (73.2%) |
434 (73.9%) |
368 (66.7%) |
1371 (70.3%) |
| Source registry |
|
|
|
|
|
|
|
|
| ChiCTR |
11 (7.4%) |
46 (6.9%) |
5 (3.4%) |
37 (5.3%) |
3 (1.2%) |
57 (9.7%) |
19 (3.4%) |
140 (7.2%) |
| CT.gov |
78 (52.3%) |
339 (50.7%) |
83 (56.8%) |
372 (53.7%) |
122 (47.5%) |
230 (39.2%) |
283 (51.3%) |
941 (48.3%) |
| CTRI |
15 (10.1%) |
57 (8.5%) |
0 (0%) |
27 (3.9%) |
15 (5.8%) |
48 (8.2%) |
30 (5.4%) |
132 (6.8%) |
| EUCTR |
12 (8.1%) |
92 (13.8%) |
21 (14.4%) |
124 (17.9%) |
62 (24.1%) |
120 (20.4%) |
95 (17.2%) |
336 (17.2%) |
| IRCT |
17 (11.4%) |
92 (13.8%) |
6 (4.1%) |
54 (7.8%) |
8 (3.1%) |
50 (8.5%) |
31 (5.6%) |
196 (10.1%) |
| JPRN |
4 (2.7%) |
6 (0.9%) |
22 (15.1%) |
23 (3.3%) |
30 (11.7%) |
37 (6.3%) |
56 (10.1%) |
66 (3.4%) |
| Other |
12 (8.1%) |
37 (5.5%) |
9 (6.2%) |
56 (8.1%) |
17 (6.6%) |
45 (7.7%) |
38 (6.9%) |
138 (7.1%) |
| Phase |
|
|
|
|
|
|
|
|
| Phase 1 |
33 (22.1%) |
52 (7.8%) |
26 (17.8%) |
60 (8.7%) |
36 (14.0%) |
73 (12.4%) |
95 (17.2%) |
185 (9.5%) |
| Phase 2 |
51 (34.2%) |
242 (36.2%) |
19 (13.0%) |
162 (23.4%) |
135 (52.5%) |
115 (19.6%) |
205 (37.1%) |
519 (26.6%) |
| Phase 3 |
23 (15.4%) |
257 (38.4%) |
22 (15.1%) |
231 (33.3%) |
31 (12.1%) |
204 (34.8%) |
76 (13.8%) |
692 (35.5%) |
| Phase 4 |
4 (2.7%) |
57 (8.5%) |
50 (34.2%) |
136 (19.6%) |
23 (8.9%) |
95 (16.2%) |
77 (13.9%) |
288 (14.8%) |
| Undefined |
34 (22.8%) |
59 (8.8%) |
18 (12.3%) |
93 (13.4%) |
19 (7.4%) |
94 (16.0%) |
71 (12.9%) |
246 (12.6%) |
| Missing |
4 (2.7%) |
2 (0.3%) |
11 (7.5%) |
11 (1.6%) |
13 (5.1%) |
6 (1.0%) |
28 (5.1%) |
19 (1.0%) |
| Africa |
|
|
|
|
|
|
|
|
| No |
146 (98.0%) |
624 (93.3%) |
142 (97.3%) |
629 (90.8%) |
247 (96.1%) |
544 (92.7%) |
535 (96.9%) |
1797 (92.2%) |
| Yes |
3 (2.0%) |
45 (6.7%) |
4 (2.7%) |
64 (9.2%) |
10 (3.9%) |
43 (7.3%) |
17 (3.1%) |
152 (7.8%) |
| North America |
|
|
|
|
|
|
|
|
| No |
111 (74.5%) |
538 (80.4%) |
97 (66.4%) |
485 (70.0%) |
160 (62.3%) |
409 (69.7%) |
368 (66.7%) |
1432 (73.5%) |
| Yes |
38 (25.5%) |
131 (19.6%) |
49 (33.6%) |
208 (30.0%) |
97 (37.7%) |
178 (30.3%) |
184 (33.3%) |
517 (26.5%) |
| Latin America |
|
|
|
|
|
|
|
|
| No |
135 (90.6%) |
592 (88.5%) |
142 (97.3%) |
618 (89.2%) |
245 (95.3%) |
505 (86.0%) |
522 (94.6%) |
1715 (88.0%) |
| Yes |
14 (9.4%) |
77 (11.5%) |
4 (2.7%) |
75 (10.8%) |
12 (4.7%) |
82 (14.0%) |
30 (5.4%) |
234 (12.0%) |
| Asia |
|
|
|
|
|
|
|
|
| No |
89 (59.7%) |
369 (55.2%) |
83 (56.8%) |
386 (55.7%) |
125 (48.6%) |
235 (40.0%) |
297 (53.8%) |
990 (50.8%) |
| Yes |
60 (40.3%) |
300 (44.8%) |
63 (43.2%) |
307 (44.3%) |
132 (51.4%) |
352 (60.0%) |
255 (46.2%) |
959 (49.2%) |
| Europe |
|
|
|
|
|
|
|
|
| No |
111 (74.5%) |
485 (72.5%) |
104 (71.2%) |
438 (63.2%) |
158 (61.5%) |
382 (65.1%) |
373 (67.6%) |
1305 (67.0%) |
| Yes |
38 (25.5%) |
184 (27.5%) |
42 (28.8%) |
255 (36.8%) |
99 (38.5%) |
205 (34.9%) |
179 (32.4%) |
644 (33.0%) |
| Oceania |
|
|
|
|
|
|
|
|
| No |
147 (98.7%) |
655 (97.9%) |
140 (95.9%) |
638 (92.1%) |
234 (91.1%) |
525 (89.4%) |
521 (94.4%) |
1818 (93.3%) |
| Yes |
2 (1.3%) |
14 (2.1%) |
6 (4.1%) |
55 (7.9%) |
23 (8.9%) |
62 (10.6%) |
31 (5.6%) |
131 (6.7%) |
| Multicentre |
|
|
|
|
|
|
|
|
| No |
105 (70.5%) |
340 (50.8%) |
104 (71.2%) |
358 (51.7%) |
127 (49.4%) |
333 (56.7%) |
336 (60.9%) |
1031 (52.9%) |
| Yes |
44 (29.5%) |
329 (49.2%) |
42 (28.8%) |
335 (48.3%) |
130 (50.6%) |
254 (43.3%) |
216 (39.1%) |
918 (47.1%) |
| Primary purpose |
|
|
|
|
|
|
|
|
| Other |
8 (5.4%) |
18 (2.7%) |
42 (28.8%) |
59 (8.5%) |
25 (9.7%) |
86 (14.7%) |
75 (13.6%) |
163 (8.4%) |
| Prevention |
26 (17.4%) |
95 (14.2%) |
44 (30.1%) |
224 (32.3%) |
13 (5.1%) |
75 (12.8%) |
83 (15.0%) |
394 (20.2%) |
| Treatment |
115 (77.2%) |
556 (83.1%) |
60 (41.1%) |
410 (59.2%) |
219 (85.2%) |
426 (72.6%) |
394 (71.4%) |
1392 (71.4%) |
| Sponsor type |
|
|
|
|
|
|
|
|
| Industry |
27 (18.1%) |
155 (23.2%) |
37 (25.3%) |
224 (32.3%) |
108 (42.0%) |
218 (37.1%) |
172 (31.2%) |
597 (30.6%) |
| Investigator |
11 (7.4%) |
41 (6.1%) |
4 (2.7%) |
31 (4.5%) |
14 (5.4%) |
28 (4.8%) |
29 (5.3%) |
100 (5.1%) |
| Non industry |
107 (71.8%) |
466 (69.7%) |
102 (69.9%) |
434 (62.6%) |
118 (45.9%) |
315 (53.7%) |
327 (59.2%) |
1215 (62.3%) |
| Missing |
4 (2.7%) |
7 (1.0%) |
3 (2.1%) |
4 (0.6%) |
17 (6.6%) |
26 (4.4%) |
24 (4.3%) |
37 (1.9%) |
| Sample size |
|
|
|
|
|
|
|
|
| Mean (SD) |
3.94 (1.38) |
5.05 (1.42) |
4.00 (1.31) |
5.09 (1.51) |
3.81 (1.15) |
4.81 (1.19) |
3.90 (1.26) |
4.99 (1.39) |
| Median [Min, Max] |
3.69 [0, 11.8] |
4.68 [1.95, 11.0] |
3.69 [0, 7.60] |
4.92 [0, 11.7] |
3.69 [0.693, 8.22] |
4.61 [0, 11.8] |
3.69 [0, 11.8] |
4.79 [0, 11.8] |
| Missing |
2 (1.3%) |
0 (0%) |
1 (0.7%) |
1 (0.1%) |
0 (0%) |
0 (0%) |
3 (0.5%) |
1 (0.1%) |
| Vaccine |
|
|
|
|
|
|
|
|
| No |
138 (92.6%) |
633 (94.6%) |
84 (57.5%) |
511 (73.7%) |
245 (95.3%) |
571 (97.3%) |
467 (84.6%) |
1715 (88.0%) |
| Yes |
11 (7.4%) |
36 (5.4%) |
62 (42.5%) |
182 (26.3%) |
12 (4.7%) |
16 (2.7%) |
85 (15.4%) |
234 (12.0%) |
| Conventional |
|
|
|
|
|
|
|
|
| No |
23 (15.4%) |
111 (16.6%) |
62 (42.5%) |
211 (30.4%) |
16 (6.2%) |
60 (10.2%) |
101 (18.3%) |
382 (19.6%) |
| Yes |
126 (84.6%) |
558 (83.4%) |
84 (57.5%) |
482 (69.6%) |
241 (93.8%) |
527 (89.8%) |
451 (81.7%) |
1567 (80.4%) |
| Traditional |
|
|
|
|
|
|
|
|
| No |
135 (90.6%) |
578 (86.4%) |
146 (100%) |
630 (90.9%) |
247 (96.1%) |
517 (88.1%) |
528 (95.7%) |
1725 (88.5%) |
| Yes |
14 (9.4%) |
91 (13.6%) |
0 (0%) |
63 (9.1%) |
10 (3.9%) |
70 (11.9%) |
24 (4.3%) |
224 (11.5%) |
| Subject blind |
|
|
|
|
|
|
|
|
| No |
133 (89.3%) |
291 (43.5%) |
116 (79.5%) |
202 (29.1%) |
236 (91.8%) |
180 (30.7%) |
485 (87.9%) |
673 (34.5%) |
| Yes |
1 (0.7%) |
214 (32.0%) |
1 (0.7%) |
267 (38.5%) |
0 (0%) |
175 (29.8%) |
2 (0.4%) |
656 (33.7%) |
| Missing |
15 (10.1%) |
164 (24.5%) |
29 (19.9%) |
224 (32.3%) |
21 (8.2%) |
232 (39.5%) |
65 (11.8%) |
620 (31.8%) |
| Caregiver blind |
|
|
|
|
|
|
|
|
| No |
133 (89.3%) |
375 (56.1%) |
117 (80.1%) |
303 (43.7%) |
236 (91.8%) |
264 (45.0%) |
486 (88.0%) |
942 (48.3%) |
| Yes |
1 (0.7%) |
130 (19.4%) |
0 (0%) |
166 (24.0%) |
0 (0%) |
87 (14.8%) |
1 (0.2%) |
383 (19.7%) |
| Missing |
15 (10.1%) |
164 (24.5%) |
29 (19.9%) |
224 (32.3%) |
21 (8.2%) |
236 (40.2%) |
65 (11.8%) |
624 (32.0%) |
| Investigator blind |
|
|
|
|
|
|
|
|
| No |
133 (89.3%) |
327 (48.9%) |
117 (80.1%) |
213 (30.7%) |
236 (91.8%) |
222 (37.8%) |
486 (88.0%) |
762 (39.1%) |
| Yes |
1 (0.7%) |
178 (26.6%) |
0 (0%) |
256 (36.9%) |
0 (0%) |
133 (22.7%) |
1 (0.2%) |
567 (29.1%) |
| Missing |
15 (10.1%) |
164 (24.5%) |
29 (19.9%) |
224 (32.3%) |
21 (8.2%) |
232 (39.5%) |
65 (11.8%) |
620 (31.8%) |
| Outcome blind |
|
|
|
|
|
|
|
|
| No |
132 (88.6%) |
385 (57.5%) |
115 (78.8%) |
286 (41.3%) |
236 (91.8%) |
249 (42.4%) |
483 (87.5%) |
920 (47.2%) |
| Yes |
2 (1.3%) |
120 (17.9%) |
5 (3.4%) |
184 (26.6%) |
0 (0%) |
103 (17.5%) |
7 (1.3%) |
407 (20.9%) |
| Missing |
15 (10.1%) |
164 (24.5%) |
26 (17.8%) |
223 (32.2%) |
21 (8.2%) |
235 (40.0%) |
62 (11.2%) |
622 (31.9%) |
| Analyst blind |
|
|
|
|
|
|
|
|
| No |
134 (89.9%) |
503 (75.2%) |
117 (80.1%) |
466 (67.2%) |
236 (91.8%) |
351 (59.8%) |
487 (88.2%) |
1320 (67.7%) |
| Yes |
0 (0%) |
2 (0.3%) |
0 (0%) |
3 (0.4%) |
0 (0%) |
0 (0%) |
0 (0%) |
5 (0.3%) |
| Missing |
15 (10.1%) |
164 (24.5%) |
29 (19.9%) |
224 (32.3%) |
21 (8.2%) |
236 (40.2%) |
65 (11.8%) |
624 (32.0%) |
Blinding
table1::table1(~ control_arm + randomisation + blinding + prospective + source_registry + phase_clean + region_Africa + region_N_America + region_L_America + region_Asia + region_Europe + region_Oceania + multicentre + primary_purpose + sponsor_type + sample_size + vaccine + conventional + traditional + subject_blind + caregiver_blind + investigator_blind + outcome_blind + analyst_blind | study_arm * blinding, data = d)
|
covid |
im |
main |
Overall |
|
No (N=452) |
Yes (N=366) |
No (N=324) |
Yes (N=515) |
No (N=457) |
Yes (N=387) |
No (N=1233) |
Yes (N=1268) |
| Control arm |
|
|
|
|
|
|
|
|
| No |
105 (23.2%) |
0 (0%) |
98 (30.2%) |
1 (0.2%) |
212 (46.4%) |
0 (0%) |
415 (33.7%) |
1 (0.1%) |
| Yes |
347 (76.8%) |
366 (100%) |
226 (69.8%) |
514 (99.8%) |
245 (53.6%) |
387 (100%) |
818 (66.3%) |
1267 (99.9%) |
| Randomisation |
|
|
|
|
|
|
|
|
| No |
147 (32.5%) |
2 (0.5%) |
134 (41.4%) |
12 (2.3%) |
254 (55.6%) |
3 (0.8%) |
535 (43.4%) |
17 (1.3%) |
| Not applicable |
0 (0%) |
0 (0%) |
0 (0%) |
0 (0%) |
0 (0%) |
0 (0%) |
0 (0%) |
0 (0%) |
| Yes |
305 (67.5%) |
364 (99.5%) |
190 (58.6%) |
503 (97.7%) |
203 (44.4%) |
384 (99.2%) |
698 (56.6%) |
1251 (98.7%) |
| Blinding |
|
|
|
|
|
|
|
|
| No |
452 (100%) |
0 (0%) |
324 (100%) |
0 (0%) |
457 (100%) |
0 (0%) |
1233 (100%) |
0 (0%) |
| Yes |
0 (0%) |
366 (100%) |
0 (0%) |
515 (100%) |
0 (0%) |
387 (100%) |
0 (0%) |
1268 (100%) |
| Prospective registration |
|
|
|
|
|
|
|
|
| No |
161 (35.6%) |
98 (26.8%) |
115 (35.5%) |
166 (32.2%) |
124 (27.1%) |
98 (25.3%) |
400 (32.4%) |
362 (28.5%) |
| Yes |
291 (64.4%) |
268 (73.2%) |
209 (64.5%) |
349 (67.8%) |
333 (72.9%) |
289 (74.7%) |
833 (67.6%) |
906 (71.5%) |
| Source registry |
|
|
|
|
|
|
|
|
| ChiCTR |
46 (10.2%) |
11 (3.0%) |
27 (8.3%) |
15 (2.9%) |
35 (7.7%) |
25 (6.5%) |
108 (8.8%) |
51 (4.0%) |
| CT.gov |
205 (45.4%) |
212 (57.9%) |
156 (48.1%) |
299 (58.1%) |
197 (43.1%) |
155 (40.1%) |
558 (45.3%) |
666 (52.5%) |
| CTRI |
54 (11.9%) |
18 (4.9%) |
11 (3.4%) |
16 (3.1%) |
30 (6.6%) |
33 (8.5%) |
95 (7.7%) |
67 (5.3%) |
| EUCTR |
54 (11.9%) |
50 (13.7%) |
51 (15.7%) |
94 (18.3%) |
97 (21.2%) |
85 (22.0%) |
202 (16.4%) |
229 (18.1%) |
| IRCT |
58 (12.8%) |
51 (13.9%) |
21 (6.5%) |
39 (7.6%) |
16 (3.5%) |
42 (10.9%) |
95 (7.7%) |
132 (10.4%) |
| JPRN |
7 (1.5%) |
3 (0.8%) |
30 (9.3%) |
15 (2.9%) |
52 (11.4%) |
15 (3.9%) |
89 (7.2%) |
33 (2.6%) |
| Other |
28 (6.2%) |
21 (5.7%) |
28 (8.6%) |
37 (7.2%) |
30 (6.6%) |
32 (8.3%) |
86 (7.0%) |
90 (7.1%) |
| Phase |
|
|
|
|
|
|
|
|
| Phase 1 |
53 (11.7%) |
32 (8.7%) |
44 (13.6%) |
42 (8.2%) |
69 (15.1%) |
40 (10.3%) |
166 (13.5%) |
114 (9.0%) |
| Phase 2 |
154 (34.1%) |
139 (38.0%) |
41 (12.7%) |
140 (27.2%) |
166 (36.3%) |
84 (21.7%) |
361 (29.3%) |
363 (28.6%) |
| Phase 3 |
134 (29.6%) |
146 (39.9%) |
72 (22.2%) |
181 (35.1%) |
95 (20.8%) |
140 (36.2%) |
301 (24.4%) |
467 (36.8%) |
| Phase 4 |
43 (9.5%) |
18 (4.9%) |
96 (29.6%) |
90 (17.5%) |
67 (14.7%) |
51 (13.2%) |
206 (16.7%) |
159 (12.5%) |
| Undefined |
65 (14.4%) |
28 (7.7%) |
54 (16.7%) |
57 (11.1%) |
42 (9.2%) |
71 (18.3%) |
161 (13.1%) |
156 (12.3%) |
| Missing |
3 (0.7%) |
3 (0.8%) |
17 (5.2%) |
5 (1.0%) |
18 (3.9%) |
1 (0.3%) |
38 (3.1%) |
9 (0.7%) |
| Africa |
|
|
|
|
|
|
|
|
| No |
431 (95.4%) |
339 (92.6%) |
310 (95.7%) |
461 (89.5%) |
436 (95.4%) |
355 (91.7%) |
1177 (95.5%) |
1155 (91.1%) |
| Yes |
21 (4.6%) |
27 (7.4%) |
14 (4.3%) |
54 (10.5%) |
21 (4.6%) |
32 (8.3%) |
56 (4.5%) |
113 (8.9%) |
| North America |
|
|
|
|
|
|
|
|
| No |
378 (83.6%) |
271 (74.0%) |
231 (71.3%) |
351 (68.2%) |
313 (68.5%) |
256 (66.1%) |
922 (74.8%) |
878 (69.2%) |
| Yes |
74 (16.4%) |
95 (26.0%) |
93 (28.7%) |
164 (31.8%) |
144 (31.5%) |
131 (33.9%) |
311 (25.2%) |
390 (30.8%) |
| Latin America |
|
|
|
|
|
|
|
|
| No |
418 (92.5%) |
309 (84.4%) |
308 (95.1%) |
452 (87.8%) |
425 (93.0%) |
325 (84.0%) |
1151 (93.3%) |
1086 (85.6%) |
| Yes |
34 (7.5%) |
57 (15.6%) |
16 (4.9%) |
63 (12.2%) |
32 (7.0%) |
62 (16.0%) |
82 (6.7%) |
182 (14.4%) |
| Asia |
|
|
|
|
|
|
|
|
| No |
235 (52.0%) |
223 (60.9%) |
173 (53.4%) |
296 (57.5%) |
194 (42.5%) |
166 (42.9%) |
602 (48.8%) |
685 (54.0%) |
| Yes |
217 (48.0%) |
143 (39.1%) |
151 (46.6%) |
219 (42.5%) |
263 (57.5%) |
221 (57.1%) |
631 (51.2%) |
583 (46.0%) |
| Europe |
|
|
|
|
|
|
|
|
| No |
334 (73.9%) |
262 (71.6%) |
226 (69.8%) |
316 (61.4%) |
293 (64.1%) |
247 (63.8%) |
853 (69.2%) |
825 (65.1%) |
| Yes |
118 (26.1%) |
104 (28.4%) |
98 (30.2%) |
199 (38.6%) |
164 (35.9%) |
140 (36.2%) |
380 (30.8%) |
443 (34.9%) |
| Oceania |
|
|
|
|
|
|
|
|
| No |
447 (98.9%) |
355 (97.0%) |
308 (95.1%) |
470 (91.3%) |
417 (91.2%) |
342 (88.4%) |
1172 (95.1%) |
1167 (92.0%) |
| Yes |
5 (1.1%) |
11 (3.0%) |
16 (4.9%) |
45 (8.7%) |
40 (8.8%) |
45 (11.6%) |
61 (4.9%) |
101 (8.0%) |
| Multicentre |
|
|
|
|
|
|
|
|
| No |
277 (61.3%) |
168 (45.9%) |
215 (66.4%) |
247 (48.0%) |
248 (54.3%) |
212 (54.8%) |
740 (60.0%) |
627 (49.4%) |
| Yes |
175 (38.7%) |
198 (54.1%) |
109 (33.6%) |
268 (52.0%) |
209 (45.7%) |
175 (45.2%) |
493 (40.0%) |
641 (50.6%) |
| Primary purpose |
|
|
|
|
|
|
|
|
| Other |
19 (4.2%) |
7 (1.9%) |
63 (19.4%) |
38 (7.4%) |
52 (11.4%) |
59 (15.2%) |
134 (10.9%) |
104 (8.2%) |
| Prevention |
47 (10.4%) |
74 (20.2%) |
82 (25.3%) |
186 (36.1%) |
31 (6.8%) |
57 (14.7%) |
160 (13.0%) |
317 (25.0%) |
| Treatment |
386 (85.4%) |
285 (77.9%) |
179 (55.2%) |
291 (56.5%) |
374 (81.8%) |
271 (70.0%) |
939 (76.2%) |
847 (66.8%) |
| Sponsor type |
|
|
|
|
|
|
|
|
| Industry |
65 (14.4%) |
117 (32.0%) |
69 (21.3%) |
192 (37.3%) |
169 (37.0%) |
157 (40.6%) |
303 (24.6%) |
466 (36.8%) |
| Investigator |
29 (6.4%) |
23 (6.3%) |
8 (2.5%) |
27 (5.2%) |
19 (4.2%) |
23 (5.9%) |
56 (4.5%) |
73 (5.8%) |
| Non industry |
350 (77.4%) |
223 (60.9%) |
242 (74.7%) |
294 (57.1%) |
232 (50.8%) |
201 (51.9%) |
824 (66.8%) |
718 (56.6%) |
| Missing |
8 (1.8%) |
3 (0.8%) |
5 (1.5%) |
2 (0.4%) |
37 (8.1%) |
6 (1.6%) |
50 (4.1%) |
11 (0.9%) |
| Sample size |
|
|
|
|
|
|
|
|
| Mean (SD) |
4.57 (1.36) |
5.19 (1.55) |
4.36 (1.43) |
5.24 (1.49) |
4.24 (1.26) |
4.83 (1.20) |
4.39 (1.35) |
5.10 (1.44) |
| Median [Min, Max] |
4.45 [0, 11.8] |
4.85 [1.95, 11.0] |
4.25 [0, 10.8] |
5.06 [0, 11.7] |
4.09 [0.693, 11.8] |
4.61 [0, 9.90] |
4.25 [0, 11.8] |
4.86 [0, 11.7] |
| Missing |
2 (0.4%) |
0 (0%) |
2 (0.6%) |
0 (0%) |
0 (0%) |
0 (0%) |
4 (0.3%) |
0 (0%) |
| Vaccine |
|
|
|
|
|
|
|
|
| No |
437 (96.7%) |
334 (91.3%) |
229 (70.7%) |
366 (71.1%) |
441 (96.5%) |
375 (96.9%) |
1107 (89.8%) |
1075 (84.8%) |
| Yes |
15 (3.3%) |
32 (8.7%) |
95 (29.3%) |
149 (28.9%) |
16 (3.5%) |
12 (3.1%) |
126 (10.2%) |
193 (15.2%) |
| Conventional |
|
|
|
|
|
|
|
|
| No |
69 (15.3%) |
65 (17.8%) |
95 (29.3%) |
178 (34.6%) |
35 (7.7%) |
41 (10.6%) |
199 (16.1%) |
284 (22.4%) |
| Yes |
383 (84.7%) |
301 (82.2%) |
229 (70.7%) |
337 (65.4%) |
422 (92.3%) |
346 (89.4%) |
1034 (83.9%) |
984 (77.6%) |
| Traditional |
|
|
|
|
|
|
|
|
| No |
390 (86.3%) |
323 (88.3%) |
312 (96.3%) |
464 (90.1%) |
420 (91.9%) |
344 (88.9%) |
1122 (91.0%) |
1131 (89.2%) |
| Yes |
62 (13.7%) |
43 (11.7%) |
12 (3.7%) |
51 (9.9%) |
37 (8.1%) |
43 (11.1%) |
111 (9.0%) |
137 (10.8%) |
| Subject blind |
|
|
|
|
|
|
|
|
| No |
408 (90.3%) |
16 (4.4%) |
270 (83.3%) |
48 (9.3%) |
398 (87.1%) |
18 (4.7%) |
1076 (87.3%) |
82 (6.5%) |
| Yes |
1 (0.2%) |
214 (58.5%) |
0 (0%) |
268 (52.0%) |
0 (0%) |
175 (45.2%) |
1 (0.1%) |
657 (51.8%) |
| Missing |
43 (9.5%) |
136 (37.2%) |
54 (16.7%) |
199 (38.6%) |
59 (12.9%) |
194 (50.1%) |
156 (12.7%) |
529 (41.7%) |
| Caregiver blind |
|
|
|
|
|
|
|
|
| No |
408 (90.3%) |
100 (27.3%) |
270 (83.3%) |
150 (29.1%) |
398 (87.1%) |
102 (26.4%) |
1076 (87.3%) |
352 (27.8%) |
| Yes |
1 (0.2%) |
130 (35.5%) |
0 (0%) |
166 (32.2%) |
0 (0%) |
87 (22.5%) |
1 (0.1%) |
383 (30.2%) |
| Missing |
43 (9.5%) |
136 (37.2%) |
54 (16.7%) |
199 (38.6%) |
59 (12.9%) |
198 (51.2%) |
156 (12.7%) |
533 (42.0%) |
| Investigator blind |
|
|
|
|
|
|
|
|
| No |
408 (90.3%) |
52 (14.2%) |
270 (83.3%) |
60 (11.7%) |
398 (87.1%) |
60 (15.5%) |
1076 (87.3%) |
172 (13.6%) |
| Yes |
1 (0.2%) |
178 (48.6%) |
0 (0%) |
256 (49.7%) |
0 (0%) |
133 (34.4%) |
1 (0.1%) |
567 (44.7%) |
| Missing |
43 (9.5%) |
136 (37.2%) |
54 (16.7%) |
199 (38.6%) |
59 (12.9%) |
194 (50.1%) |
156 (12.7%) |
529 (41.7%) |
| Outcome blind |
|
|
|
|
|
|
|
|
| No |
408 (90.3%) |
109 (29.8%) |
270 (83.3%) |
131 (25.4%) |
398 (87.1%) |
87 (22.5%) |
1076 (87.3%) |
327 (25.8%) |
| Yes |
1 (0.2%) |
121 (33.1%) |
0 (0%) |
189 (36.7%) |
0 (0%) |
103 (26.6%) |
1 (0.1%) |
413 (32.6%) |
| Missing |
43 (9.5%) |
136 (37.2%) |
54 (16.7%) |
195 (37.9%) |
59 (12.9%) |
197 (50.9%) |
156 (12.7%) |
528 (41.6%) |
| Analyst blind |
|
|
|
|
|
|
|
|
| No |
409 (90.5%) |
228 (62.3%) |
270 (83.3%) |
313 (60.8%) |
398 (87.1%) |
189 (48.8%) |
1077 (87.3%) |
730 (57.6%) |
| Yes |
0 (0%) |
2 (0.5%) |
0 (0%) |
3 (0.6%) |
0 (0%) |
0 (0%) |
0 (0%) |
5 (0.4%) |
| Missing |
43 (9.5%) |
136 (37.2%) |
54 (16.7%) |
199 (38.6%) |
59 (12.9%) |
198 (51.2%) |
156 (12.7%) |
533 (42.0%) |
Prospective registration
table1::table1(~ control_arm + randomisation + blinding + prospective + source_registry + phase_clean + region_Africa + region_N_America + region_L_America + region_Asia + region_Europe + region_Oceania + multicentre + primary_purpose + sponsor_type + sample_size + vaccine + conventional + traditional + subject_blind + caregiver_blind + investigator_blind + outcome_blind + analyst_blind | study_arm * prospective, data = d)
|
covid |
im |
main |
Overall |
|
No (N=259) |
Yes (N=559) |
No (N=281) |
Yes (N=558) |
No (N=222) |
Yes (N=622) |
No (N=762) |
Yes (N=1739) |
| Control arm |
|
|
|
|
|
|
|
|
| No |
38 (14.7%) |
67 (12.0%) |
37 (13.2%) |
62 (11.1%) |
58 (26.1%) |
154 (24.8%) |
133 (17.5%) |
283 (16.3%) |
| Yes |
221 (85.3%) |
492 (88.0%) |
244 (86.8%) |
496 (88.9%) |
164 (73.9%) |
468 (75.2%) |
629 (82.5%) |
1456 (83.7%) |
| Randomisation |
|
|
|
|
|
|
|
|
| No |
57 (22.0%) |
92 (16.5%) |
58 (20.6%) |
88 (15.8%) |
69 (31.1%) |
188 (30.2%) |
184 (24.1%) |
368 (21.2%) |
| Not applicable |
0 (0%) |
0 (0%) |
0 (0%) |
0 (0%) |
0 (0%) |
0 (0%) |
0 (0%) |
0 (0%) |
| Yes |
202 (78.0%) |
467 (83.5%) |
223 (79.4%) |
470 (84.2%) |
153 (68.9%) |
434 (69.8%) |
578 (75.9%) |
1371 (78.8%) |
| Blinding |
|
|
|
|
|
|
|
|
| No |
161 (62.2%) |
291 (52.1%) |
115 (40.9%) |
209 (37.5%) |
124 (55.9%) |
333 (53.5%) |
400 (52.5%) |
833 (47.9%) |
| Yes |
98 (37.8%) |
268 (47.9%) |
166 (59.1%) |
349 (62.5%) |
98 (44.1%) |
289 (46.5%) |
362 (47.5%) |
906 (52.1%) |
| Prospective registration |
|
|
|
|
|
|
|
|
| No |
259 (100%) |
0 (0%) |
281 (100%) |
0 (0%) |
222 (100%) |
0 (0%) |
762 (100%) |
0 (0%) |
| Yes |
0 (0%) |
559 (100%) |
0 (0%) |
558 (100%) |
0 (0%) |
622 (100%) |
0 (0%) |
1739 (100%) |
| Source registry |
|
|
|
|
|
|
|
|
| ChiCTR |
33 (12.7%) |
24 (4.3%) |
12 (4.3%) |
30 (5.4%) |
9 (4.1%) |
51 (8.2%) |
54 (7.1%) |
105 (6.0%) |
| CT.gov |
113 (43.6%) |
304 (54.4%) |
153 (54.4%) |
302 (54.1%) |
112 (50.5%) |
240 (38.6%) |
378 (49.6%) |
846 (48.6%) |
| CTRI |
1 (0.4%) |
71 (12.7%) |
6 (2.1%) |
21 (3.8%) |
1 (0.5%) |
62 (10.0%) |
8 (1.0%) |
154 (8.9%) |
| EUCTR |
36 (13.9%) |
68 (12.2%) |
20 (7.1%) |
125 (22.4%) |
13 (5.9%) |
169 (27.2%) |
69 (9.1%) |
362 (20.8%) |
| IRCT |
56 (21.6%) |
53 (9.5%) |
53 (18.9%) |
7 (1.3%) |
36 (16.2%) |
22 (3.5%) |
145 (19.0%) |
82 (4.7%) |
| JPRN |
1 (0.4%) |
9 (1.6%) |
20 (7.1%) |
25 (4.5%) |
31 (14.0%) |
36 (5.8%) |
52 (6.8%) |
70 (4.0%) |
| Other |
19 (7.3%) |
30 (5.4%) |
17 (6.0%) |
48 (8.6%) |
20 (9.0%) |
42 (6.8%) |
56 (7.3%) |
120 (6.9%) |
| Phase |
|
|
|
|
|
|
|
|
| Phase 1 |
28 (10.8%) |
57 (10.2%) |
37 (13.2%) |
49 (8.8%) |
34 (15.3%) |
75 (12.1%) |
99 (13.0%) |
181 (10.4%) |
| Phase 2 |
73 (28.2%) |
220 (39.4%) |
47 (16.7%) |
134 (24.0%) |
58 (26.1%) |
192 (30.9%) |
178 (23.4%) |
546 (31.4%) |
| Phase 3 |
95 (36.7%) |
185 (33.1%) |
81 (28.8%) |
172 (30.8%) |
62 (27.9%) |
173 (27.8%) |
238 (31.2%) |
530 (30.5%) |
| Phase 4 |
26 (10.0%) |
35 (6.3%) |
59 (21.0%) |
127 (22.8%) |
25 (11.3%) |
93 (15.0%) |
110 (14.4%) |
255 (14.7%) |
| Undefined |
36 (13.9%) |
57 (10.2%) |
48 (17.1%) |
63 (11.3%) |
34 (15.3%) |
79 (12.7%) |
118 (15.5%) |
199 (11.4%) |
| Missing |
1 (0.4%) |
5 (0.9%) |
9 (3.2%) |
13 (2.3%) |
9 (4.1%) |
10 (1.6%) |
19 (2.5%) |
28 (1.6%) |
| Africa |
|
|
|
|
|
|
|
|
| No |
251 (96.9%) |
519 (92.8%) |
266 (94.7%) |
505 (90.5%) |
209 (94.1%) |
582 (93.6%) |
726 (95.3%) |
1606 (92.4%) |
| Yes |
8 (3.1%) |
40 (7.2%) |
15 (5.3%) |
53 (9.5%) |
13 (5.9%) |
40 (6.4%) |
36 (4.7%) |
133 (7.6%) |
| North America |
|
|
|
|
|
|
|
|
| No |
227 (87.6%) |
422 (75.5%) |
225 (80.1%) |
357 (64.0%) |
184 (82.9%) |
385 (61.9%) |
636 (83.5%) |
1164 (66.9%) |
| Yes |
32 (12.4%) |
137 (24.5%) |
56 (19.9%) |
201 (36.0%) |
38 (17.1%) |
237 (38.1%) |
126 (16.5%) |
575 (33.1%) |
| Latin America |
|
|
|
|
|
|
|
|
| No |
226 (87.3%) |
501 (89.6%) |
263 (93.6%) |
497 (89.1%) |
207 (93.2%) |
543 (87.3%) |
696 (91.3%) |
1541 (88.6%) |
| Yes |
33 (12.7%) |
58 (10.4%) |
18 (6.4%) |
61 (10.9%) |
15 (6.8%) |
79 (12.7%) |
66 (8.7%) |
198 (11.4%) |
| Asia |
|
|
|
|
|
|
|
|
| No |
130 (50.2%) |
328 (58.7%) |
131 (46.6%) |
338 (60.6%) |
87 (39.2%) |
273 (43.9%) |
348 (45.7%) |
939 (54.0%) |
| Yes |
129 (49.8%) |
231 (41.3%) |
150 (53.4%) |
220 (39.4%) |
135 (60.8%) |
349 (56.1%) |
414 (54.3%) |
800 (46.0%) |
| Europe |
|
|
|
|
|
|
|
|
| No |
189 (73.0%) |
407 (72.8%) |
220 (78.3%) |
322 (57.7%) |
179 (80.6%) |
361 (58.0%) |
588 (77.2%) |
1090 (62.7%) |
| Yes |
70 (27.0%) |
152 (27.2%) |
61 (21.7%) |
236 (42.3%) |
43 (19.4%) |
261 (42.0%) |
174 (22.8%) |
649 (37.3%) |
| Oceania |
|
|
|
|
|
|
|
|
| No |
259 (100%) |
543 (97.1%) |
272 (96.8%) |
506 (90.7%) |
220 (99.1%) |
539 (86.7%) |
751 (98.6%) |
1588 (91.3%) |
| Yes |
0 (0%) |
16 (2.9%) |
9 (3.2%) |
52 (9.3%) |
2 (0.9%) |
83 (13.3%) |
11 (1.4%) |
151 (8.7%) |
| Multicentre |
|
|
|
|
|
|
|
|
| No |
166 (64.1%) |
279 (49.9%) |
202 (71.9%) |
260 (46.6%) |
162 (73.0%) |
298 (47.9%) |
530 (69.6%) |
837 (48.1%) |
| Yes |
93 (35.9%) |
280 (50.1%) |
79 (28.1%) |
298 (53.4%) |
60 (27.0%) |
324 (52.1%) |
232 (30.4%) |
902 (51.9%) |
| Primary purpose |
|
|
|
|
|
|
|
|
| Other |
7 (2.7%) |
19 (3.4%) |
41 (14.6%) |
60 (10.8%) |
32 (14.4%) |
79 (12.7%) |
80 (10.5%) |
158 (9.1%) |
| Prevention |
36 (13.9%) |
85 (15.2%) |
103 (36.7%) |
165 (29.6%) |
23 (10.4%) |
65 (10.5%) |
162 (21.3%) |
315 (18.1%) |
| Treatment |
216 (83.4%) |
455 (81.4%) |
137 (48.8%) |
333 (59.7%) |
167 (75.2%) |
478 (76.8%) |
520 (68.2%) |
1266 (72.8%) |
| Sponsor type |
|
|
|
|
|
|
|
|
| Industry |
39 (15.1%) |
143 (25.6%) |
77 (27.4%) |
184 (33.0%) |
53 (23.9%) |
273 (43.9%) |
169 (22.2%) |
600 (34.5%) |
| Investigator |
8 (3.1%) |
44 (7.9%) |
11 (3.9%) |
24 (4.3%) |
7 (3.2%) |
35 (5.6%) |
26 (3.4%) |
103 (5.9%) |
| Non industry |
211 (81.5%) |
362 (64.8%) |
189 (67.3%) |
347 (62.2%) |
138 (62.2%) |
295 (47.4%) |
538 (70.6%) |
1004 (57.7%) |
| Missing |
1 (0.4%) |
10 (1.8%) |
4 (1.4%) |
3 (0.5%) |
24 (10.8%) |
19 (3.1%) |
29 (3.8%) |
32 (1.8%) |
| Sample size |
|
|
|
|
|
|
|
|
| Mean (SD) |
4.66 (1.31) |
4.93 (1.54) |
4.55 (1.37) |
5.08 (1.58) |
4.30 (1.25) |
4.58 (1.26) |
4.52 (1.32) |
4.85 (1.48) |
| Median [Min, Max] |
4.61 [0, 10.6] |
4.61 [1.79, 11.8] |
4.48 [0, 9.10] |
4.99 [0, 11.7] |
4.19 [1.10, 11.8] |
4.50 [0, 9.39] |
4.41 [0, 11.8] |
4.61 [0, 11.8] |
| Missing |
1 (0.4%) |
1 (0.2%) |
2 (0.7%) |
0 (0%) |
0 (0%) |
0 (0%) |
3 (0.4%) |
1 (0.1%) |
| Vaccine |
|
|
|
|
|
|
|
|
| No |
251 (96.9%) |
520 (93.0%) |
187 (66.5%) |
408 (73.1%) |
215 (96.8%) |
601 (96.6%) |
653 (85.7%) |
1529 (87.9%) |
| Yes |
8 (3.1%) |
39 (7.0%) |
94 (33.5%) |
150 (26.9%) |
7 (3.2%) |
21 (3.4%) |
109 (14.3%) |
210 (12.1%) |
| Conventional |
|
|
|
|
|
|
|
|
| No |
33 (12.7%) |
101 (18.1%) |
102 (36.3%) |
171 (30.6%) |
22 (9.9%) |
54 (8.7%) |
157 (20.6%) |
326 (18.7%) |
| Yes |
226 (87.3%) |
458 (81.9%) |
179 (63.7%) |
387 (69.4%) |
200 (90.1%) |
568 (91.3%) |
605 (79.4%) |
1413 (81.3%) |
| Traditional |
|
|
|
|
|
|
|
|
| No |
226 (87.3%) |
487 (87.1%) |
262 (93.2%) |
514 (92.1%) |
195 (87.8%) |
569 (91.5%) |
683 (89.6%) |
1570 (90.3%) |
| Yes |
33 (12.7%) |
72 (12.9%) |
19 (6.8%) |
44 (7.9%) |
27 (12.2%) |
53 (8.5%) |
79 (10.4%) |
169 (9.7%) |
| Subject blind |
|
|
|
|
|
|
|
|
| No |
139 (53.7%) |
285 (51.0%) |
122 (43.4%) |
196 (35.1%) |
120 (54.1%) |
296 (47.6%) |
381 (50.0%) |
777 (44.7%) |
| Yes |
46 (17.8%) |
169 (30.2%) |
73 (26.0%) |
195 (34.9%) |
45 (20.3%) |
130 (20.9%) |
164 (21.5%) |
494 (28.4%) |
| Missing |
74 (28.6%) |
105 (18.8%) |
86 (30.6%) |
167 (29.9%) |
57 (25.7%) |
196 (31.5%) |
217 (28.5%) |
468 (26.9%) |
| Caregiver blind |
|
|
|
|
|
|
|
|
| No |
151 (58.3%) |
357 (63.9%) |
155 (55.2%) |
265 (47.5%) |
138 (62.2%) |
362 (58.2%) |
444 (58.3%) |
984 (56.6%) |
| Yes |
34 (13.1%) |
97 (17.4%) |
40 (14.2%) |
126 (22.6%) |
25 (11.3%) |
62 (10.0%) |
99 (13.0%) |
285 (16.4%) |
| Missing |
74 (28.6%) |
105 (18.8%) |
86 (30.6%) |
167 (29.9%) |
59 (26.6%) |
198 (31.8%) |
219 (28.7%) |
470 (27.0%) |
| Investigator blind |
|
|
|
|
|
|
|
|
| No |
151 (58.3%) |
309 (55.3%) |
127 (45.2%) |
203 (36.4%) |
128 (57.7%) |
330 (53.1%) |
406 (53.3%) |
842 (48.4%) |
| Yes |
34 (13.1%) |
145 (25.9%) |
68 (24.2%) |
188 (33.7%) |
37 (16.7%) |
96 (15.4%) |
139 (18.2%) |
429 (24.7%) |
| Missing |
74 (28.6%) |
105 (18.8%) |
86 (30.6%) |
167 (29.9%) |
57 (25.7%) |
196 (31.5%) |
217 (28.5%) |
468 (26.9%) |
| Outcome blind |
|
|
|
|
|
|
|
|
| No |
160 (61.8%) |
357 (63.9%) |
142 (50.5%) |
259 (46.4%) |
135 (60.8%) |
350 (56.3%) |
437 (57.3%) |
966 (55.5%) |
| Yes |
25 (9.7%) |
97 (17.4%) |
54 (19.2%) |
135 (24.2%) |
29 (13.1%) |
74 (11.9%) |
108 (14.2%) |
306 (17.6%) |
| Missing |
74 (28.6%) |
105 (18.8%) |
85 (30.2%) |
164 (29.4%) |
58 (26.1%) |
198 (31.8%) |
217 (28.5%) |
467 (26.9%) |
| Analyst blind |
|
|
|
|
|
|
|
|
| No |
185 (71.4%) |
452 (80.9%) |
195 (69.4%) |
388 (69.5%) |
163 (73.4%) |
424 (68.2%) |
543 (71.3%) |
1264 (72.7%) |
| Yes |
0 (0%) |
2 (0.4%) |
0 (0%) |
3 (0.5%) |
0 (0%) |
0 (0%) |
0 (0%) |
5 (0.3%) |
| Missing |
74 (28.6%) |
105 (18.8%) |
86 (30.6%) |
167 (29.9%) |
59 (26.6%) |
198 (31.8%) |
219 (28.7%) |
470 (27.0%) |
Outcomes over time (analysis 3)
Covid trial characteristics over time
Results number 3. Trials with missing start date (23) are dropped.
d_cov <- d %>%
filter(study_arm == "covid")
# sum(is.na(d_cov$start_date))
d_sum <- d_cov %>%
group_by(month = lubridate::floor_date(start_date, "month")) %>%
summarize(Controlled = sum(control_arm == "Yes")/n(),
Randomised = sum(randomisation == "Yes")/n(),
Blinded = sum(blinding == "Yes")/n(),
Prospective = sum(prospective == "Yes")/n()) %>%
gather(key = "variable", value = "value", -month) %>%
rename(Variable = variable) %>%
filter(complete.cases(.))
## `summarise()` ungrouping output (override with `.groups` argument)
# d_sum <- d_sum %>%
# filter(month > "2019-01-01" & month < "2020-01-01")
ggplot(d_sum, aes(x = month, y = value)) +
geom_line(aes(color = Variable, linetype = Variable)) +
theme_classic() +
labs(x = "Start date (grouped by month)",
y = "Proportion") +
theme(legend.title = element_blank()) +
ylim(0,1)

Direct effect (analyses 4 and 5)
Complete case analysis
For each outcome we fit a logistic regression model with adjustment for all covariates, not including the other outcomes.
# define outcome variables
myvars <- c("control_arm", "randomisation", "blinding", "prospective")
# define the adjustment set.
main_direct_adjustment <- main_dataset %>%
select(covid, source_registry:traditional) %>%
names()
# originally this was the same as main, but there were problems with
# convergence, so intervention variables are dropped
indication_direct_adjustment <- main_dataset %>%
select(covid, source_registry:traditional) %>%
select(-vaccine, -conventional, -traditional) %>%
names()
# logistic regression for each outcome
main_direct_models <- lapply(myvars, function (x){
glm(as.formula(paste(x, "~", paste(main_direct_adjustment, collapse = "+"))),
family = binomial(link = "logit"),
data = main_dataset)
})
We are adjusting for covid, source_registry, phase_clean, region_Africa, region_N_America, region_L_America, region_Asia, region_Europe, region_Oceania, multicentre, primary_purpose, sponsor_type, sample_size, vaccine, conventional, traditional. Adjusting for source_registry leads to convergence problems so this variable has been excluded. Same with conventional (convergence problems for indication dataset).
Main dataset
sjPlot::tab_model(main_direct_models)
|
|
Control arm
|
randomisation
|
Blinding
|
Prospective registration
|
|
Predictors
|
Odds Ratios
|
CI
|
p
|
Odds Ratios
|
CI
|
p
|
Odds Ratios
|
CI
|
p
|
Odds Ratios
|
CI
|
p
|
|
(Intercept)
|
0.71
|
0.10 – 4.43
|
0.715
|
0.38
|
0.08 – 1.68
|
0.207
|
0.37
|
0.13 – 1.05
|
0.063
|
2.13
|
0.68 – 6.67
|
0.194
|
|
covidTRUE
|
1.91
|
1.38 – 2.65
|
<0.001
|
1.69
|
1.26 – 2.27
|
<0.001
|
0.73
|
0.57 – 0.92
|
0.008
|
0.81
|
0.62 – 1.05
|
0.106
|
|
Source registry: CT.gov
|
0.24
|
0.09 – 0.57
|
0.003
|
0.44
|
0.21 – 0.87
|
0.024
|
1.60
|
0.96 – 2.69
|
0.071
|
0.80
|
0.48 – 1.31
|
0.374
|
|
Source registry: CTRI
|
0.21
|
0.07 – 0.55
|
0.003
|
0.37
|
0.16 – 0.81
|
0.016
|
1.04
|
0.58 – 1.89
|
0.884
|
32.16
|
9.22 – 203.94
|
<0.001
|
|
Source registry: EUCTR
|
0.09
|
0.03 – 0.26
|
<0.001
|
0.29
|
0.12 – 0.67
|
0.004
|
1.32
|
0.70 – 2.50
|
0.390
|
0.91
|
0.46 – 1.78
|
0.777
|
|
Source registry: IRCT
|
0.54
|
0.17 – 1.55
|
0.269
|
0.59
|
0.26 – 1.32
|
0.210
|
3.16
|
1.79 – 5.63
|
<0.001
|
0.52
|
0.30 – 0.91
|
0.023
|
|
Source registry: JPRN
|
0.28
|
0.06 – 1.34
|
0.096
|
0.45
|
0.12 – 1.78
|
0.233
|
2.52
|
0.86 – 7.74
|
0.095
|
1.24
|
0.39 – 4.82
|
0.731
|
|
Source registry: Other
|
0.22
|
0.07 – 0.62
|
0.006
|
0.49
|
0.20 – 1.16
|
0.108
|
1.44
|
0.77 – 2.73
|
0.256
|
0.58
|
0.31 – 1.11
|
0.100
|
|
Phase: Phase 2
|
1.03
|
0.64 – 1.64
|
0.905
|
1.03
|
0.67 – 1.59
|
0.878
|
1.15
|
0.78 – 1.70
|
0.477
|
1.35
|
0.89 – 2.06
|
0.159
|
|
Phase: Phase 3
|
2.34
|
1.29 – 4.28
|
0.005
|
2.46
|
1.44 – 4.22
|
0.001
|
1.29
|
0.84 – 1.98
|
0.249
|
0.94
|
0.59 – 1.50
|
0.806
|
|
Phase: Phase 4
|
1.48
|
0.78 – 2.83
|
0.234
|
1.97
|
1.10 – 3.62
|
0.025
|
0.92
|
0.57 – 1.48
|
0.739
|
1.09
|
0.66 – 1.81
|
0.737
|
|
Phase: Undefined
|
0.98
|
0.55 – 1.76
|
0.959
|
1.05
|
0.63 – 1.78
|
0.845
|
1.53
|
0.97 – 2.39
|
0.065
|
0.95
|
0.59 – 1.54
|
0.845
|
|
Africa: Yes
|
1.69
|
0.80 – 4.01
|
0.197
|
1.18
|
0.63 – 2.39
|
0.619
|
1.03
|
0.66 – 1.61
|
0.901
|
1.79
|
1.03 – 3.25
|
0.048
|
|
North America: Yes
|
0.55
|
0.37 – 0.83
|
0.004
|
0.43
|
0.29 – 0.63
|
<0.001
|
0.80
|
0.59 – 1.08
|
0.150
|
2.02
|
1.39 – 3.00
|
<0.001
|
|
Latin America: Yes
|
1.11
|
0.65 – 1.98
|
0.702
|
1.48
|
0.88 – 2.57
|
0.152
|
1.88
|
1.31 – 2.73
|
0.001
|
0.76
|
0.49 – 1.19
|
0.218
|
|
Asia: Yes
|
0.62
|
0.41 – 0.93
|
0.020
|
0.63
|
0.43 – 0.93
|
0.020
|
0.69
|
0.51 – 0.94
|
0.018
|
1.08
|
0.74 – 1.61
|
0.709
|
|
Europe: Yes
|
0.99
|
0.61 – 1.64
|
0.970
|
0.69
|
0.44 – 1.09
|
0.113
|
0.67
|
0.47 – 0.95
|
0.025
|
1.27
|
0.83 – 1.98
|
0.280
|
|
Oceania: Yes
|
0.83
|
0.44 – 1.63
|
0.586
|
0.73
|
0.40 – 1.37
|
0.324
|
0.99
|
0.61 – 1.62
|
0.975
|
16.37
|
4.87 – 102.36
|
<0.001
|
|
Multicentre: Yes
|
0.75
|
0.52 – 1.10
|
0.139
|
0.85
|
0.60 – 1.19
|
0.338
|
0.99
|
0.75 – 1.29
|
0.918
|
1.48
|
1.10 – 1.98
|
0.009
|
Primary purpose: Prevention
|
0.78
|
0.35 – 1.75
|
0.534
|
0.65
|
0.33 – 1.28
|
0.209
|
1.48
|
0.88 – 2.49
|
0.140
|
0.80
|
0.44 – 1.43
|
0.446
|
Primary purpose: Treatment
|
0.48
|
0.26 – 0.84
|
0.013
|
0.57
|
0.34 – 0.94
|
0.033
|
0.73
|
0.48 – 1.10
|
0.137
|
0.89
|
0.55 – 1.40
|
0.609
|
Sponsor type: Investigator
|
0.65
|
0.34 – 1.26
|
0.196
|
0.73
|
0.40 – 1.36
|
0.311
|
0.87
|
0.53 – 1.44
|
0.592
|
1.35
|
0.71 – 2.68
|
0.378
|
Sponsor type: Non industry
|
0.70
|
0.47 – 1.03
|
0.073
|
0.67
|
0.47 – 0.96
|
0.032
|
0.54
|
0.40 – 0.73
|
<0.001
|
0.84
|
0.60 – 1.16
|
0.291
|
|
Sample size
|
2.32
|
1.97 – 2.74
|
<0.001
|
2.29
|
1.97 – 2.67
|
<0.001
|
1.37
|
1.24 – 1.52
|
<0.001
|
1.09
|
0.97 – 1.21
|
0.142
|
|
Vaccine: Yes
|
0.67
|
0.18 – 2.90
|
0.570
|
0.35
|
0.11 – 1.17
|
0.081
|
0.77
|
0.33 – 1.80
|
0.553
|
1.38
|
0.53 – 3.86
|
0.523
|
|
Conventional: Yes
|
1.77
|
0.51 – 7.65
|
0.402
|
1.25
|
0.45 – 3.88
|
0.676
|
0.80
|
0.40 – 1.58
|
0.515
|
0.71
|
0.33 – 1.52
|
0.373
|
|
Traditional: Yes
|
3.73
|
1.14 – 16.25
|
0.049
|
2.37
|
0.93 – 7.13
|
0.093
|
1.18
|
0.62 – 2.24
|
0.601
|
0.72
|
0.37 – 1.43
|
0.347
|
|
Observations
|
1588
|
1588
|
1588
|
1588
|
|
R2 Tjur
|
0.215
|
0.228
|
0.103
|
0.142
|
sjPlot::plot_models(main_direct_models, prefix.labels = "varname") + theme_bw() + geom_hline(yintercept = 1)

Indication-matched dataset
We repeat the same analyses on the indication-matched dataset.
# logistic regression for each outcome
indication_direct_models <- lapply(myvars, function (x){
glm(as.formula(paste(x, "~", paste(indication_direct_adjustment, collapse = "+"))),
family = binomial(link = "logit"),
data = indication_dataset)
})
names(indication_direct_models) <- paste0(myvars, "_(direct_indication)")
# lapply(indication_direct_models, summary)
sjPlot::tab_model(indication_direct_models)
|
|
Control arm
|
randomisation
|
Blinding
|
Prospective registration
|
|
Predictors
|
Odds Ratios
|
CI
|
p
|
Odds Ratios
|
CI
|
p
|
Odds Ratios
|
CI
|
p
|
Odds Ratios
|
CI
|
p
|
|
(Intercept)
|
0.67
|
0.18 – 2.68
|
0.559
|
0.31
|
0.10 – 0.94
|
0.038
|
0.21
|
0.09 – 0.49
|
<0.001
|
0.46
|
0.20 – 1.06
|
0.067
|
|
covidTRUE
|
0.46
|
0.30 – 0.68
|
<0.001
|
0.49
|
0.35 – 0.68
|
<0.001
|
0.38
|
0.30 – 0.49
|
<0.001
|
1.02
|
0.79 – 1.31
|
0.894
|
|
Source registry: CT.gov
|
0.27
|
0.09 – 0.68
|
0.008
|
0.53
|
0.26 – 1.07
|
0.082
|
2.22
|
1.28 – 3.94
|
0.005
|
1.17
|
0.70 – 1.95
|
0.544
|
|
Source registry: CTRI
|
0.38
|
0.12 – 1.12
|
0.086
|
0.79
|
0.33 – 1.89
|
0.590
|
1.05
|
0.53 – 2.09
|
0.879
|
9.80
|
4.19 – 25.99
|
<0.001
|
|
Source registry: EUCTR
|
0.11
|
0.03 – 0.32
|
<0.001
|
0.37
|
0.15 – 0.89
|
0.029
|
1.30
|
0.66 – 2.58
|
0.452
|
1.34
|
0.69 – 2.60
|
0.381
|
|
Source registry: IRCT
|
0.56
|
0.18 – 1.67
|
0.312
|
0.74
|
0.33 – 1.63
|
0.460
|
3.02
|
1.65 – 5.62
|
<0.001
|
0.44
|
0.25 – 0.76
|
0.004
|
|
Source registry: JPRN
|
0.05
|
0.01 – 0.18
|
<0.001
|
0.12
|
0.04 – 0.34
|
<0.001
|
1.37
|
0.54 – 3.53
|
0.516
|
0.85
|
0.35 – 2.07
|
0.715
|
|
Source registry: Other
|
0.27
|
0.08 – 0.89
|
0.033
|
0.66
|
0.25 – 1.79
|
0.408
|
1.04
|
0.51 – 2.15
|
0.911
|
0.95
|
0.47 – 1.90
|
0.877
|
|
Phase: Phase 2
|
2.40
|
1.40 – 4.12
|
0.001
|
2.25
|
1.40 – 3.60
|
0.001
|
1.63
|
1.09 – 2.46
|
0.019
|
1.36
|
0.90 – 2.07
|
0.147
|
|
Phase: Phase 3
|
2.37
|
1.27 – 4.48
|
0.007
|
2.29
|
1.33 – 3.96
|
0.003
|
1.13
|
0.73 – 1.75
|
0.597
|
1.08
|
0.69 – 1.69
|
0.748
|
|
Phase: Phase 4
|
1.17
|
0.64 – 2.15
|
0.608
|
1.08
|
0.64 – 1.82
|
0.777
|
0.63
|
0.40 – 1.00
|
0.048
|
0.94
|
0.59 – 1.48
|
0.779
|
|
Phase: Undefined
|
0.92
|
0.50 – 1.72
|
0.804
|
0.97
|
0.56 – 1.67
|
0.913
|
0.94
|
0.58 – 1.52
|
0.808
|
0.83
|
0.51 – 1.34
|
0.448
|
|
Africa: Yes
|
1.85
|
0.77 – 5.54
|
0.213
|
2.22
|
0.99 – 5.96
|
0.075
|
1.42
|
0.91 – 2.27
|
0.132
|
1.70
|
1.03 – 2.92
|
0.045
|
|
North America: Yes
|
0.80
|
0.49 – 1.33
|
0.384
|
0.58
|
0.38 – 0.92
|
0.018
|
0.78
|
0.57 – 1.07
|
0.119
|
1.90
|
1.35 – 2.72
|
<0.001
|
|
Latin America: Yes
|
1.06
|
0.55 – 2.18
|
0.863
|
1.13
|
0.63 – 2.14
|
0.700
|
1.74
|
1.17 – 2.62
|
0.007
|
0.71
|
0.47 – 1.08
|
0.100
|
|
Asia: Yes
|
0.86
|
0.51 – 1.48
|
0.580
|
0.87
|
0.55 – 1.40
|
0.554
|
0.88
|
0.63 – 1.22
|
0.429
|
0.99
|
0.70 – 1.42
|
0.977
|
|
Europe: Yes
|
0.93
|
0.53 – 1.66
|
0.800
|
0.79
|
0.48 – 1.30
|
0.341
|
0.84
|
0.59 – 1.18
|
0.299
|
1.22
|
0.84 – 1.78
|
0.301
|
|
Oceania: Yes
|
1.25
|
0.49 – 3.74
|
0.658
|
1.22
|
0.52 – 3.16
|
0.664
|
1.70
|
0.93 – 3.23
|
0.095
|
3.63
|
1.72 – 8.53
|
0.001
|
|
Multicentre: Yes
|
1.17
|
0.77 – 1.80
|
0.470
|
1.41
|
0.99 – 2.04
|
0.060
|
1.25
|
0.97 – 1.62
|
0.090
|
1.57
|
1.20 – 2.06
|
0.001
|
Primary purpose: Prevention
|
1.33
|
0.71 – 2.49
|
0.368
|
1.08
|
0.63 – 1.85
|
0.767
|
1.67
|
1.04 – 2.72
|
0.036
|
0.75
|
0.46 – 1.22
|
0.248
|
Primary purpose: Treatment
|
2.36
|
1.35 – 4.06
|
0.002
|
2.63
|
1.62 – 4.24
|
<0.001
|
1.47
|
0.95 – 2.28
|
0.085
|
1.19
|
0.77 – 1.84
|
0.437
|
Sponsor type: Investigator
|
1.06
|
0.48 – 2.44
|
0.896
|
1.00
|
0.49 – 2.11
|
0.994
|
0.98
|
0.57 – 1.69
|
0.944
|
1.32
|
0.73 – 2.45
|
0.369
|
Sponsor type: Non industry
|
1.17
|
0.74 – 1.82
|
0.503
|
0.96
|
0.64 – 1.42
|
0.837
|
0.56
|
0.41 – 0.75
|
<0.001
|
1.12
|
0.82 – 1.53
|
0.469
|
|
Sample size
|
1.99
|
1.69 – 2.37
|
<0.001
|
1.85
|
1.61 – 2.14
|
<0.001
|
1.36
|
1.24 – 1.50
|
<0.001
|
1.16
|
1.06 – 1.28
|
0.002
|
|
Observations
|
1610
|
1610
|
1610
|
1610
|
|
R2 Tjur
|
0.148
|
0.172
|
0.163
|
0.136
|
sjPlot::plot_models(indication_direct_models, prefix.labels = "varname") + theme_bw() + geom_hline(yintercept = 1) # prefix.labels = "label"

Multiple imputation
Phase, sample size (for 3 trials) and sponsor type have missing values.
Main dataset
# generate start_year from start_date
main_dataset$start_year <- as.numeric(substr(main_dataset$start_date, 1, 4))
# create a new dataset where some variables are dropped
main_for_mice <- main_dataset %>% select(-TrialID, -start_date, -study_arm)
main_for_mice %>% map(class)
## $control_arm
## [1] "factor"
##
## $randomisation
## [1] "factor"
##
## $blinding
## [1] "factor"
##
## $prospective
## [1] "factor"
##
## $source_registry
## [1] "factor"
##
## $phase_clean
## [1] "factor"
##
## $region_Africa
## [1] "factor"
##
## $region_N_America
## [1] "factor"
##
## $region_L_America
## [1] "factor"
##
## $region_Asia
## [1] "factor"
##
## $region_Europe
## [1] "factor"
##
## $region_Oceania
## [1] "factor"
##
## $multicentre
## [1] "factor"
##
## $primary_purpose
## [1] "factor"
##
## $sponsor_type
## [1] "factor"
##
## $sample_size
## [1] "numeric"
##
## $vaccine
## [1] "factor"
##
## $conventional
## [1] "factor"
##
## $traditional
## [1] "factor"
##
## $subject_blind
## [1] "factor"
##
## $caregiver_blind
## [1] "factor"
##
## $investigator_blind
## [1] "factor"
##
## $outcome_blind
## [1] "factor"
##
## $analyst_blind
## [1] "factor"
##
## $covid
## [1] "logical"
##
## $start_year
## [1] "numeric"
Use the mice command to generate an initial predictor matrix (indicating which column variables are used to predict the row variables) without making any imputations.
set.seed(5)
mice_in <- mice::mice(main_for_mice, maxit = 0, print = FALSE)
predictor_matrix <- mice_in$pred
correlation_matrix <- round(cor(sapply(main_for_mice[,c(myvars, main_direct_adjustment)], as.numeric), use = "pairwise.complete.obs"), 2)
# keep only upper triangle
correlation_matrix[lower.tri(correlation_matrix)] <- NA
melted_correlation_matrix <- reshape2::melt(correlation_matrix)
# lower triangle
melted_correlation_matrix$Var2 <- factor(melted_correlation_matrix$Var2, levels(melted_correlation_matrix$Var2)[seq(length(levels(melted_correlation_matrix$Var2)), 1, by = -1)], ordered = TRUE)
labels_plot <- sapply(levels(melted_correlation_matrix[,1]), function(x) gsub("_", "", x))
ggplot(data = melted_correlation_matrix, aes(x = Var1, y = Var2, fill = value)) + geom_tile(color = "white") + xlab("") + ylab("") + scale_fill_gradient2(low = "darkorchid4", high = "green3", mid = "white", midpoint = 0, na.value = "white", limit = c(-1, 1), name = "Correlation") + scale_x_discrete(labels = labels_plot) + scale_y_discrete(labels = labels_plot[length(labels_plot):1]) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1),
panel.grid.major = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "white"),
axis.ticks = element_blank(),
legend.justification = c(1, 0),
legend.position = c(0.7, 0.7),
legend.direction = "horizontal") + guides(fill = guide_colorbar(barwidth = 7, barheight = 1, title.position = "top", title.hjust = 0.5))

# quickpred: quick selection procedure of predictors
# select predictors according to data relations with a minimum correlation of ρ = 0.25
mice_in2 <- mice::mice(main_for_mice, pred = mice::quickpred(main_for_mice, mincor = 0.25), print = FALSE)
mice_in2$pred
Modify the predictor matrix such that only phase, sample size and sponsor type are imputed, using all variables.
predictor_matrix[,"covid"] <- 1 # no need to set element in row 'covid' to 0 because the entire row will be set to 0 below
predictor_matrix[which(!(row.names(predictor_matrix) %in% c("phase_clean", "sample_size", "sponsor_type"))),] <- 0
Generate 10 imputed datasets using chained equations (using package mice).
main_mice <- mice::mice(main_for_mice, m = 10, pred = predictor_matrix, print = FALSE)
## Warning: Number of logged events: 150
attributes(main_mice)
## $names
## [1] "data" "imp" "m" "where"
## [5] "blocks" "call" "nmis" "method"
## [9] "predictorMatrix" "visitSequence" "formulas" "post"
## [13] "blots" "ignore" "seed" "iteration"
## [17] "lastSeedValue" "chainMean" "chainVar" "loggedEvents"
## [21] "version" "date"
##
## $class
## [1] "mids"
Original data:
main_mice$data %>% head
Imputed datasets:
main_mice$imp %>% map(head)
## $control_arm
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $randomisation
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $blinding
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $prospective
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $source_registry
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $phase_clean
## 1 2 3 4 5 6 7 8 9
## 758 Phase 2 Phase 2 Phase 2 Phase 3 Phase 2 Phase 2 Phase 2 Phase 2 Phase 2
## 775 Phase 2 Phase 3 Phase 3 Phase 4 Phase 3 Phase 2 Phase 2 Phase 3 Phase 2
## 790 Phase 2 Phase 4 Undefined Phase 1 Phase 1 Phase 4 Phase 1 Phase 4 Phase 1
## 800 Phase 3 Phase 3 Phase 3 Phase 2 Phase 4 Phase 2 Phase 2 Phase 4 Phase 2
## 802 Phase 4 Phase 2 Phase 2 Phase 3 Phase 3 Undefined Phase 3 Phase 4 Phase 4
## 809 Phase 3 Phase 2 Phase 3 Phase 3 Phase 2 Phase 2 Phase 3 Phase 3 Phase 2
## 10
## 758 Phase 3
## 775 Phase 3
## 790 Phase 2
## 800 Phase 2
## 802 Phase 3
## 809 Phase 2
##
## $region_Africa
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $region_N_America
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $region_L_America
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $region_Asia
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $region_Europe
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $region_Oceania
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $multicentre
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $primary_purpose
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $sponsor_type
## 1 2 3 4 5
## 133 Non industry Non industry Non industry Non industry Non industry
## 145 Investigator Non industry Non industry Industry Investigator
## 227 Investigator Industry Industry Non industry Non industry
## 272 Non industry Non industry Non industry Non industry Non industry
## 765 Non industry Industry Non industry Non industry Non industry
## 766 Non industry Non industry Non industry Industry Non industry
## 6 7 8 9 10
## 133 Non industry Non industry Non industry Non industry Non industry
## 145 Investigator Investigator Investigator Non industry Non industry
## 227 Non industry Industry Investigator Non industry Non industry
## 272 Industry Investigator Investigator Non industry Non industry
## 765 Non industry Non industry Non industry Non industry Industry
## 766 Non industry Non industry Non industry Non industry Non industry
##
## $sample_size
## 1 2 3 4 5 6 7 8
## 1646 4.094345 6.461468 3.912023 5.298317 2.995732 4.094345 5.023881 5.010635
## 1648 4.382027 4.787492 4.060443 4.094345 5.298317 5.075174 7.600902 5.298317
## 9 10
## 1646 4.317488 3.912023
## 1648 5.010635 6.907755
##
## $vaccine
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $conventional
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $traditional
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $subject_blind
## 1 2 3 4 5 6 7 8 9 10
## 1 No No No No Yes No No No Yes No
## 2 Yes No No No No Yes Yes No No No
## 3 Yes No Yes No Yes No No Yes No No
## 6 No No Yes No No Yes No No No No
## 8 Yes No No Yes No Yes Yes Yes No No
## 10 Yes No No Yes No No Yes No No No
##
## $caregiver_blind
## 1 2 3 4 5 6 7 8 9 10
## 1 No No No No No No No No No No
## 2 No No No Yes No Yes No No No Yes
## 3 No No No No No No No No No No
## 6 No No No No Yes No Yes No No No
## 8 No No No No No No Yes Yes No No
## 10 No No No No No No No No No No
##
## $investigator_blind
## 1 2 3 4 5 6 7 8 9 10
## 1 No No No Yes No No No No No No
## 2 No Yes No No No No No Yes Yes No
## 3 No No No No Yes No No Yes No No
## 6 No Yes Yes No No No No No Yes No
## 8 Yes Yes No Yes No No Yes Yes No No
## 10 No Yes Yes No No No Yes No Yes Yes
##
## $outcome_blind
## 1 2 3 4 5 6 7 8 9 10
## 1 Yes Yes No No No No No Yes Yes No
## 2 No No No No No No No No No No
## 3 No No No Yes No Yes Yes No No No
## 6 No No No No No Yes No No No Yes
## 8 No No No No No No No No No Yes
## 10 No No No No No No No No No No
##
## $analyst_blind
## 1 2 3 4 5 6 7 8 9 10
## 1 No No No No No No No No No No
## 2 No No No No No No No No No No
## 3 No No No No No No No No No No
## 6 No No No No No No No No No No
## 8 No No No No No No No No No No
## 10 No No No No No No No No No No
##
## $covid
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $start_year
## 1 2 3 4 5 6 7 8 9 10
## 32 2018 2019 2020 2020 2019 2019 2019 2020 2019 2019
## 34 2018 2020 2020 2018 2020 2020 2019 2020 2019 2019
## 35 2020 2020 2020 2020 2019 2019 2019 2020 2020 2019
## 48 2020 2019 2020 2020 2018 2020 2020 2020 2019 2019
## 60 2020 2019 2020 2020 2020 2019 2019 2020 2019 2019
## 66 2020 2020 2019 2018 2019 2019 2020 2020 2019 2019
# In order to get the third imputed data set, use the complete() function
Only impute the three variables listed above.
method_vector <- main_mice$method
method_vector[19:length(method_vector)] <- ""
Generate 10 imputed datasets using the updated method vector.
main_mice <- mice::mice(main_for_mice, method = method_vector, m = 10, pred = predictor_matrix, print = FALSE)
## Warning: Number of logged events: 150
plot(main_mice)

Check that there is no trend with further iterations and that the lines mix.
main_mice_40 <- mice::mice.mids(main_mice, maxit = 40, print = FALSE)
plot(main_mice_40)

Plot of observed (blue) and imputed (red) phase:
mice::stripplot(main_mice, phase_clean ~ .imp, pch = 20, cex = 2)

# Under MCAR, univariate distributions of the observed and imputed data are expected to be identical. Under MAR, they can be different, both in location and spread, but their multivariate distribution is assumed to be identical.
mice::stripplot(main_mice)

Analysis
Logistic regression analysis on the multiply imputed data.
main_direct_models_mice <- lapply(myvars, function (x){
fit <- with(main_mice, glm(as.formula(paste(x, "~", paste(main_direct_adjustment, collapse = "+"))),
family = binomial(link = "logit")))
pooled_fit <- mice::pool(fit)
return(list("fit" = fit, "pooled_fit" = pooled_fit))
})
names(main_direct_models_mice) <- myvars
lapply(main_direct_models_mice, function(x) summary(x$pooled_fit))
## $control_arm
## term estimate std.error statistic df
## 1 (Intercept) -0.323572308 0.92959121 -0.34808022 1598.4373
## 2 covidTRUE 0.618259500 0.16237967 3.80749322 1597.0008
## 3 source_registryCT.gov -1.411159424 0.47728743 -2.95662389 1601.4857
## 4 source_registryCTRI -1.533922309 0.52344811 -2.93041906 1598.0291
## 5 source_registryEUCTR -2.350422124 0.54254335 -4.33222919 1601.3571
## 6 source_registryIRCT -0.596825348 0.55017172 -1.08479829 1598.7801
## 7 source_registryJPRN -2.075894604 0.57411290 -3.61582993 1596.2357
## 8 source_registryOther -1.612302584 0.53839782 -2.99463061 1601.5954
## 9 phase_cleanPhase 2 0.003202403 0.23500209 0.01362713 1587.2581
## 10 phase_cleanPhase 3 0.846821180 0.30287209 2.79596968 1403.0430
## 11 phase_cleanPhase 4 0.481981354 0.32549989 1.48074197 1519.0704
## 12 phase_cleanUndefined -0.054094868 0.29130187 -0.18570038 1591.3714
## 13 region_AfricaYes 0.569047966 0.40422593 1.40774730 1601.4203
## 14 region_N_AmericaYes -0.566042714 0.20440011 -2.76928766 1589.4581
## 15 region_L_AmericaYes 0.026915363 0.27384848 0.09828560 1596.2130
## 16 region_AsiaYes -0.456541548 0.20618992 -2.21417977 1585.4265
## 17 region_EuropeYes 0.035887997 0.24776273 0.14484825 1597.8287
## 18 region_OceaniaYes -0.124661350 0.33039352 -0.37731173 1600.4312
## 19 multicentreYes -0.353441145 0.18536918 -1.90668779 1589.0634
## 20 primary_purposePrevention -0.427248647 0.38472816 -1.11052087 1600.1633
## 21 primary_purposeTreatment -0.688233028 0.28845954 -2.38589106 1600.5377
## 22 sponsor_typeInvestigator -0.449343969 0.33296184 -1.34953595 356.8947
## 23 sponsor_typeNon industry -0.369521068 0.19801429 -1.86613332 1290.4162
## 24 sample_size 0.834435791 0.08238432 10.12857553 1560.1915
## 25 vaccineYes -0.274335663 0.67531644 -0.40623276 1600.4876
## 26 conventionalYes 0.543418138 0.66247970 0.82027893 1599.6400
## 27 traditionalYes 1.327909251 0.65466071 2.02839309 1601.1526
## p.value
## 1 7.278258e-01
## 2 1.456916e-04
## 3 3.155793e-03
## 4 3.433111e-03
## 5 1.567791e-05
## 6 2.781746e-01
## 7 3.086927e-04
## 8 2.789913e-03
## 9 9.891292e-01
## 10 5.244985e-03
## 11 1.388827e-01
## 12 8.527034e-01
## 13 1.594000e-01
## 14 5.683192e-03
## 15 9.217179e-01
## 16 2.695823e-02
## 17 8.848490e-01
## 18 7.059920e-01
## 19 5.674123e-02
## 20 2.669415e-01
## 21 1.715347e-02
## 22 1.780203e-01
## 23 6.224943e-02
## 24 0.000000e+00
## 25 6.846259e-01
## 26 4.121793e-01
## 27 4.268538e-02
##
## $randomisation
## term estimate std.error statistic df
## 1 (Intercept) -0.77791775 0.7431457 -1.04679031 1594.4882
## 2 covidTRUE 0.47299023 0.1468621 3.22064174 1597.3922
## 3 source_registryCT.gov -0.79706700 0.3647786 -2.18507081 1600.9901
## 4 source_registryCTRI -0.97688208 0.4114559 -2.37420821 1594.3545
## 5 source_registryEUCTR -1.19851986 0.4346808 -2.75724163 1601.0225
## 6 source_registryIRCT -0.51295104 0.4147741 -1.23669966 1596.6432
## 7 source_registryJPRN -1.37094423 0.4820941 -2.84372718 1586.5883
## 8 source_registryOther -0.94062843 0.4356893 -2.15894330 1601.5228
## 9 phase_cleanPhase 2 0.01687318 0.2178745 0.07744450 1588.3124
## 10 phase_cleanPhase 3 0.90141181 0.2729890 3.30200796 1441.7912
## 11 phase_cleanPhase 4 0.76078839 0.3017254 2.52145979 1549.7167
## 12 phase_cleanUndefined 0.03308733 0.2629163 0.12584740 1593.2046
## 13 region_AfricaYes 0.20580646 0.3378080 0.60924100 1601.2102
## 14 region_N_AmericaYes -0.83568882 0.1935904 -4.31678934 1590.1850
## 15 region_L_AmericaYes 0.31546991 0.2635272 1.19710583 1598.8803
## 16 region_AsiaYes -0.44884986 0.1951778 -2.29969779 1592.2280
## 17 region_EuropeYes -0.39133976 0.2249493 -1.73967957 1597.9718
## 18 region_OceaniaYes -0.22094743 0.3120843 -0.70797352 1600.6668
## 19 multicentreYes -0.22546972 0.1698297 -1.32762224 1593.0547
## 20 primary_purposePrevention -0.52012954 0.3331818 -1.56109818 1600.0354
## 21 primary_purposeTreatment -0.47155619 0.2543431 -1.85401597 1598.5342
## 22 sponsor_typeInvestigator -0.34023255 0.3147573 -1.08093613 384.8753
## 23 sponsor_typeNon industry -0.41199108 0.1830437 -2.25077954 1357.6037
## 24 sample_size 0.82554355 0.0760372 10.85710091 1574.3164
## 25 vaccineYes -1.11869495 0.5581184 -2.00440426 1600.6558
## 26 conventionalYes 0.01356544 0.5116842 0.02651135 1600.1972
## 27 traditionalYes 0.74026928 0.4858899 1.52353299 1601.2040
## p.value
## 1 2.953550e-01
## 2 1.304768e-03
## 3 2.902819e-02
## 4 1.770445e-02
## 5 5.895330e-03
## 6 2.163805e-01
## 7 4.516075e-03
## 8 3.100268e-02
## 9 9.382797e-01
## 10 9.834478e-04
## 11 1.178648e-02
## 12 8.998686e-01
## 13 5.424511e-01
## 14 1.680856e-05
## 15 2.314428e-01
## 16 2.159420e-02
## 17 8.210784e-02
## 18 4.790648e-01
## 19 1.844931e-01
## 20 1.186983e-01
## 21 6.392096e-02
## 22 2.804024e-01
## 23 2.455897e-02
## 24 0.000000e+00
## 25 4.519496e-02
## 26 9.788528e-01
## 27 1.278229e-01
##
## $blinding
## term estimate std.error statistic df
## 1 (Intercept) -0.98388666 0.52496003 -1.87421252 1600.575
## 2 covidTRUE -0.32288357 0.11998295 -2.69107889 1601.841
## 3 source_registryCT.gov 0.48370794 0.26082150 1.85455545 1601.712
## 4 source_registryCTRI 0.07103238 0.29808919 0.23829237 1599.308
## 5 source_registryEUCTR 0.27391694 0.32307750 0.84783664 1601.780
## 6 source_registryIRCT 1.15731317 0.29065028 3.98180649 1601.576
## 7 source_registryJPRN -0.78976801 0.42264922 -1.86861343 1599.118
## 8 source_registryOther 0.28480634 0.32120703 0.88667530 1601.793
## 9 phase_cleanPhase 2 0.08308180 0.19756945 0.42051948 1600.324
## 10 phase_cleanPhase 3 0.21934709 0.21851818 1.00379330 1600.535
## 11 phase_cleanPhase 4 -0.12207191 0.23873740 -0.51132295 1600.373
## 12 phase_cleanUndefined 0.37764738 0.22816573 1.65514503 1599.719
## 13 region_AfricaYes 0.04741161 0.22748126 0.20841984 1601.841
## 14 region_N_AmericaYes -0.21361914 0.15369021 -1.38993324 1600.689
## 15 region_L_AmericaYes 0.61687622 0.18607173 3.31526032 1601.801
## 16 region_AsiaYes -0.35333982 0.15452359 -2.28663992 1600.966
## 17 region_EuropeYes -0.38776147 0.17739629 -2.18584875 1601.668
## 18 region_OceaniaYes -0.00532621 0.25061832 -0.02125228 1601.841
## 19 multicentreYes 0.01803151 0.13559170 0.13298391 1598.748
## 20 primary_purposePrevention 0.29482269 0.26031212 1.13257380 1601.401
## 21 primary_purposeTreatment -0.36392841 0.20741440 -1.75459568 1601.394
## 22 sponsor_typeInvestigator -0.18870460 0.24901537 -0.75780305 1394.104
## 23 sponsor_typeNon industry -0.60670810 0.14922794 -4.06564674 1576.597
## 24 sample_size 0.31469309 0.05168821 6.08829596 1594.497
## 25 vaccineYes -0.27690898 0.41795170 -0.66253822 1600.980
## 26 conventionalYes -0.16509395 0.34300267 -0.48131973 1600.888
## 27 traditionalYes 0.20211822 0.31753185 0.63652896 1601.455
## p.value
## 1 6.108317e-02
## 2 7.196208e-03
## 3 6.384341e-02
## 4 8.116849e-01
## 5 3.966556e-01
## 6 7.145597e-05
## 7 6.185933e-02
## 8 3.753868e-01
## 9 6.741625e-01
## 10 3.156300e-01
## 11 6.091955e-01
## 12 9.809123e-02
## 13 8.349277e-01
## 14 1.647424e-01
## 15 9.360371e-04
## 16 2.234689e-02
## 17 2.897104e-02
## 18 9.830471e-01
## 19 8.942228e-01
## 20 2.575628e-01
## 21 7.951973e-02
## 22 4.486970e-01
## 23 5.026141e-05
## 24 1.427003e-09
## 25 5.077217e-01
## 26 6.303551e-01
## 27 5.245227e-01
##
## $prospective
## term estimate std.error statistic df
## 1 (Intercept) 0.71258425 0.57509983 1.2390618 1599.8737
## 2 covidTRUE -0.14774011 0.13188767 -1.1201965 1601.0682
## 3 source_registryCT.gov -0.22647422 0.25674079 -0.8821123 1601.4474
## 4 source_registryCTRI 3.53257295 0.75030551 4.7081794 1600.8059
## 5 source_registryEUCTR -0.13371242 0.34513908 -0.3874160 1601.8257
## 6 source_registryIRCT -0.63226526 0.28485937 -2.2195698 1599.3262
## 7 source_registryJPRN -0.81686212 0.38459597 -2.1239487 1597.4909
## 8 source_registryOther -0.46194700 0.32408699 -1.4253796 1601.8025
## 9 phase_cleanPhase 2 0.27074235 0.21274668 1.2726043 1595.9858
## 10 phase_cleanPhase 3 -0.12064423 0.23648010 -0.5101665 1588.9664
## 11 phase_cleanPhase 4 0.05703499 0.25424262 0.2243329 1578.6212
## 12 phase_cleanUndefined -0.08155917 0.24380493 -0.3345263 1594.0350
## 13 region_AfricaYes 0.61239293 0.29428346 2.0809628 1601.3989
## 14 region_N_AmericaYes 0.73492863 0.19492825 3.7702520 1601.5679
## 15 region_L_AmericaYes -0.24873193 0.22524619 -1.1042670 1601.3458
## 16 region_AsiaYes 0.10622445 0.19806108 0.5363217 1600.7600
## 17 region_EuropeYes 0.30630468 0.21849226 1.4019018 1601.8218
## 18 region_OceaniaYes 2.75812481 0.73697934 3.7424724 1601.8407
## 19 multicentreYes 0.43682705 0.14492318 3.0141973 1599.9605
## 20 primary_purposePrevention -0.25774904 0.29236225 -0.8816085 1601.5769
## 21 primary_purposeTreatment -0.21431002 0.23518810 -0.9112281 1600.5932
## 22 sponsor_typeInvestigator 0.18529664 0.31758872 0.5834484 558.6916
## 23 sponsor_typeNon industry -0.16974099 0.16700650 -1.0163736 1594.9972
## 24 sample_size 0.07110458 0.05526889 1.2865208 1571.5518
## 25 vaccineYes 0.29697460 0.48326207 0.6145208 1601.6674
## 26 conventionalYes -0.23629320 0.37886094 -0.6236938 1601.7313
## 27 traditionalYes -0.25249890 0.34250672 -0.7372086 1601.8407
## p.value
## 1 2.155043e-01
## 2 2.627980e-01
## 3 3.778484e-01
## 4 2.715148e-06
## 5 6.984997e-01
## 6 2.658780e-02
## 7 3.382770e-02
## 8 1.542423e-01
## 9 2.033438e-01
## 10 6.100056e-01
## 11 8.225273e-01
## 12 7.380264e-01
## 13 3.759589e-02
## 14 1.690058e-04
## 15 2.696432e-01
## 16 5.918108e-01
## 17 1.611383e-01
## 18 1.886515e-04
## 19 2.617114e-03
## 20 3.781208e-01
## 21 3.623124e-01
## 22 5.598269e-01
## 23 3.096057e-01
## 24 1.984508e-01
## 25 5.389585e-01
## 26 5.329175e-01
## 27 4.611035e-01
sum_main_direct_mice <- lapply(main_direct_models_mice, function(x) summary(x$pooled_fit))
# OR and 95% CI (Bonferroni corrected)
pool_OR_main_direct_mice <- lapply(sum_main_direct_mice, function(x) {
cbind(exp(cbind(x[2,2], (x[2,2]-z*(x[2,3])),
(x[2,2]+z*(x[2,3])))), x[2,6])
})
pool_OR_main_direct_mice <- do.call(rbind.data.frame, pool_OR_main_direct_mice)
colnames(pool_OR_main_direct_mice) <- c("Estimate", "Lower CI", "Upper CI", "P-value")
pool_OR_main_direct_mice$Analysis <- "Main direct (4)"
fmi: fraction of information about the coefficients missing due to nonresponse
lambda: proportion of the variation attributable to the missing data
mice::densityplot(main_mice, ~ phase_clean)

mice::densityplot(main_mice, ~ sponsor_type)

mice::densityplot(main_mice, ~ sample_size)

Compare complete case analysis with MICE analysis for the each outcome
Control arm
summary(glm(as.formula(paste("control_arm", "~", paste(main_direct_adjustment, collapse = "+"))), family = binomial(link = "logit"), data = main_dataset))
##
## Call:
## glm(formula = as.formula(paste("control_arm", "~", paste(main_direct_adjustment,
## collapse = "+"))), family = binomial(link = "logit"), data = main_dataset)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -4.2300 0.1641 0.3709 0.6190 2.2914
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.34524 0.94605 -0.365 0.71517
## covidTRUE 0.64700 0.16590 3.900 9.62e-05 ***
## source_registryCT.gov -1.43398 0.47798 -3.000 0.00270 **
## source_registryCTRI -1.56766 0.52492 -2.986 0.00282 **
## source_registryEUCTR -2.35595 0.54439 -4.328 1.51e-05 ***
## source_registryIRCT -0.60915 0.55104 -1.105 0.26896
## source_registryJPRN -1.28158 0.77065 -1.663 0.09631 .
## source_registryOther -1.50499 0.54654 -2.754 0.00589 **
## phase_cleanPhase 2 0.02835 0.23797 0.119 0.90518
## phase_cleanPhase 3 0.85186 0.30478 2.795 0.00519 **
## phase_cleanPhase 4 0.38919 0.32689 1.191 0.23383
## phase_cleanUndefined -0.01534 0.29509 -0.052 0.95854
## region_AfricaYes 0.52325 0.40560 1.290 0.19703
## region_N_AmericaYes -0.59577 0.20700 -2.878 0.00400 **
## region_L_AmericaYes 0.10826 0.28337 0.382 0.70242
## region_AsiaYes -0.48605 0.20844 -2.332 0.01971 *
## region_EuropeYes -0.00945 0.25339 -0.037 0.97025
## region_OceaniaYes -0.18176 0.33384 -0.544 0.58614
## multicentreYes -0.28371 0.19159 -1.481 0.13865
## primary_purposePrevention -0.25428 0.40867 -0.622 0.53379
## primary_purposeTreatment -0.73610 0.29646 -2.483 0.01303 *
## sponsor_typeInvestigator -0.43063 0.33325 -1.292 0.19628
## sponsor_typeNon industry -0.35873 0.19983 -1.795 0.07263 .
## sample_size 0.84054 0.08373 10.038 < 2e-16 ***
## vaccineYes -0.39679 0.69881 -0.568 0.57016
## conventionalYes 0.57009 0.68070 0.838 0.40231
## traditionalYes 1.31550 0.66888 1.967 0.04922 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1497.6 on 1587 degrees of freedom
## Residual deviance: 1189.8 on 1561 degrees of freedom
## (74 observations deleted due to missingness)
## AIC: 1243.8
##
## Number of Fisher Scoring iterations: 6
fit <- with(main_mice, glm(as.formula(paste("control_arm", "~", paste(main_direct_adjustment, collapse = "+"))), family = binomial(link = "logit")))
pooled_fit <- mice::pool(fit)
pooled_fit$pooled
summary(pooled_fit)
Randomisation
summary(glm(as.formula(paste("randomisation", "~", paste(main_direct_adjustment, collapse = "+"))), family = binomial(link = "logit"), data = main_dataset))
##
## Call:
## glm(formula = as.formula(paste("randomisation", "~", paste(main_direct_adjustment,
## collapse = "+"))), family = binomial(link = "logit"), data = main_dataset)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.8363 0.1173 0.4368 0.6938 2.3118
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.97122 0.76955 -1.262 0.206930
## covidTRUE 0.52597 0.14976 3.512 0.000445 ***
## source_registryCT.gov -0.82466 0.36508 -2.259 0.023892 *
## source_registryCTRI -0.99723 0.41228 -2.419 0.015571 *
## source_registryEUCTR -1.24284 0.43653 -2.847 0.004412 **
## source_registryIRCT -0.52090 0.41518 -1.255 0.209607
## source_registryJPRN -0.80277 0.67318 -1.193 0.233063
## source_registryOther -0.72117 0.44822 -1.609 0.107625
## phase_cleanPhase 2 0.03378 0.22034 0.153 0.878166
## phase_cleanPhase 3 0.90101 0.27411 3.287 0.001012 **
## phase_cleanPhase 4 0.67994 0.30357 2.240 0.025101 *
## phase_cleanUndefined 0.05189 0.26561 0.195 0.845098
## region_AfricaYes 0.16886 0.33918 0.498 0.618601
## region_N_AmericaYes -0.83954 0.19586 -4.286 1.82e-05 ***
## region_L_AmericaYes 0.39010 0.27209 1.434 0.151645
## region_AsiaYes -0.45841 0.19737 -2.323 0.020200 *
## region_EuropeYes -0.36571 0.23085 -1.584 0.113150
## region_OceaniaYes -0.31150 0.31586 -0.986 0.324046
## multicentreYes -0.16751 0.17477 -0.958 0.337849
## primary_purposePrevention -0.43757 0.34832 -1.256 0.209032
## primary_purposeTreatment -0.55979 0.26182 -2.138 0.032514 *
## sponsor_typeInvestigator -0.31801 0.31406 -1.013 0.311262
## sponsor_typeNon industry -0.39483 0.18461 -2.139 0.032460 *
## sample_size 0.82826 0.07714 10.737 < 2e-16 ***
## vaccineYes -1.03897 0.59606 -1.743 0.081325 .
## conventionalYes 0.22692 0.54359 0.417 0.676346
## traditionalYes 0.86484 0.51526 1.678 0.093259 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1724.2 on 1587 degrees of freedom
## Residual deviance: 1384.1 on 1561 degrees of freedom
## (74 observations deleted due to missingness)
## AIC: 1438.1
##
## Number of Fisher Scoring iterations: 5
fit <- with(main_mice, glm(as.formula(paste("randomisation", "~", paste(main_direct_adjustment, collapse = "+"))), family = binomial(link = "logit")))
pooled_fit <- mice::pool(fit)
pooled_fit$pooled
summary(pooled_fit)
Blinding
summary(glm(as.formula(paste("blinding", "~", paste(main_direct_adjustment, collapse = "+"))), family = binomial(link = "logit"), data = main_dataset))
##
## Call:
## glm(formula = as.formula(paste("blinding", "~", paste(main_direct_adjustment,
## collapse = "+"))), family = binomial(link = "logit"), data = main_dataset)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.4562 -1.0479 -0.7021 1.1217 1.9151
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.985183 0.529629 -1.860 0.062866 .
## covidTRUE -0.319825 0.120826 -2.647 0.008121 **
## source_registryCT.gov 0.471870 0.261535 1.804 0.071195 .
## source_registryCTRI 0.043744 0.299956 0.146 0.884051
## source_registryEUCTR 0.278409 0.323742 0.860 0.389804
## source_registryIRCT 1.150588 0.291532 3.947 7.92e-05 ***
## source_registryJPRN 0.923499 0.552734 1.671 0.094765 .
## source_registryOther 0.367803 0.323775 1.136 0.255965
## phase_cleanPhase 2 0.141445 0.198823 0.711 0.476829
## phase_cleanPhase 3 0.253202 0.219806 1.152 0.249349
## phase_cleanPhase 4 -0.080269 0.240979 -0.333 0.739063
## phase_cleanUndefined 0.422286 0.228867 1.845 0.065020 .
## region_AfricaYes 0.028242 0.227861 0.124 0.901359
## region_N_AmericaYes -0.221598 0.153847 -1.440 0.149760
## region_L_AmericaYes 0.632431 0.187540 3.372 0.000746 ***
## region_AsiaYes -0.366039 0.154933 -2.363 0.018149 *
## region_EuropeYes -0.400955 0.178577 -2.245 0.024750 *
## region_OceaniaYes -0.007944 0.250653 -0.032 0.974716
## multicentreYes -0.014139 0.137642 -0.103 0.918185
## primary_purposePrevention 0.390406 0.264763 1.475 0.140333
## primary_purposeTreatment -0.312637 0.210117 -1.488 0.136772
## sponsor_typeInvestigator -0.137036 0.255614 -0.536 0.591888
## sponsor_typeNon industry -0.611411 0.150362 -4.066 4.78e-05 ***
## sample_size 0.315180 0.052056 6.055 1.41e-09 ***
## vaccineYes -0.256717 0.432746 -0.593 0.553029
## conventionalYes -0.228963 0.351349 -0.652 0.514615
## traditionalYes 0.169326 0.323662 0.523 0.600864
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2194.1 on 1587 degrees of freedom
## Residual deviance: 2022.7 on 1561 degrees of freedom
## (74 observations deleted due to missingness)
## AIC: 2076.7
##
## Number of Fisher Scoring iterations: 4
fit <- with(main_mice, glm(as.formula(paste("blinding", "~", paste(main_direct_adjustment, collapse = "+"))), family = binomial(link = "logit")))
pooled_fit <- mice::pool(fit)
pooled_fit$pooled
summary(pooled_fit)
Prospective registration
summary(glm(as.formula(paste("prospective", "~", paste(main_direct_adjustment, collapse = "+"))), family = binomial(link = "logit"), data = main_dataset))
##
## Call:
## glm(formula = as.formula(paste("prospective", "~", paste(main_direct_adjustment,
## collapse = "+"))), family = binomial(link = "logit"), data = main_dataset)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.0200 -1.0575 0.5725 0.8440 1.4896
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.75616 0.58158 1.300 0.193544
## covidTRUE -0.21664 0.13383 -1.619 0.105506
## source_registryCT.gov -0.22929 0.25765 -0.890 0.373507
## source_registryCTRI 3.47071 0.75040 4.625 3.74e-06 ***
## source_registryEUCTR -0.09829 0.34659 -0.284 0.776731
## source_registryIRCT -0.65173 0.28610 -2.278 0.022727 *
## source_registryJPRN 0.21656 0.62886 0.344 0.730570
## source_registryOther -0.53804 0.32748 -1.643 0.100390
## phase_cleanPhase 2 0.30294 0.21505 1.409 0.158922
## phase_cleanPhase 3 -0.05856 0.23852 -0.246 0.806062
## phase_cleanPhase 4 0.08626 0.25639 0.336 0.736544
## phase_cleanUndefined -0.04768 0.24434 -0.195 0.845271
## region_AfricaYes 0.58034 0.29331 1.979 0.047862 *
## region_N_AmericaYes 0.70411 0.19596 3.593 0.000327 ***
## region_L_AmericaYes -0.27907 0.22678 -1.231 0.218485
## region_AsiaYes 0.07420 0.19848 0.374 0.708521
## region_EuropeYes 0.23831 0.22081 1.079 0.280462
## region_OceaniaYes 2.79560 0.73834 3.786 0.000153 ***
## multicentreYes 0.38960 0.14842 2.625 0.008665 **
## primary_purposePrevention -0.22668 0.29761 -0.762 0.446265
## primary_purposeTreatment -0.12162 0.23794 -0.511 0.609252
## sponsor_typeInvestigator 0.29738 0.33727 0.882 0.377920
## sponsor_typeNon industry -0.17980 0.17019 -1.056 0.290776
## sample_size 0.08214 0.05590 1.469 0.141734
## vaccineYes 0.32409 0.50698 0.639 0.522649
## conventionalYes -0.34526 0.38788 -0.890 0.373402
## traditionalYes -0.32613 0.34693 -0.940 0.347192
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1891.4 on 1587 degrees of freedom
## Residual deviance: 1632.3 on 1561 degrees of freedom
## (74 observations deleted due to missingness)
## AIC: 1686.3
##
## Number of Fisher Scoring iterations: 6
fit <- with(main_mice, glm(as.formula(paste("prospective", "~", paste(main_direct_adjustment, collapse = "+"))), family = binomial(link = "logit")))
pooled_fit <- mice::pool(fit)
pooled_fit$pooled
summary(pooled_fit)
Indication-matched dataset
# generate start_year from start_date
indication_dataset$start_year <- as.numeric(substr(indication_dataset$start_date, 1, 4))
# create a new dataset where some variables are dropped
indication_for_mice <- indication_dataset %>% select(-TrialID, -start_date, -study_arm)
indication_for_mice %>% map(class)
## $control_arm
## [1] "factor"
##
## $randomisation
## [1] "factor"
##
## $blinding
## [1] "factor"
##
## $prospective
## [1] "factor"
##
## $source_registry
## [1] "factor"
##
## $phase_clean
## [1] "factor"
##
## $region_Africa
## [1] "factor"
##
## $region_N_America
## [1] "factor"
##
## $region_L_America
## [1] "factor"
##
## $region_Asia
## [1] "factor"
##
## $region_Europe
## [1] "factor"
##
## $region_Oceania
## [1] "factor"
##
## $multicentre
## [1] "factor"
##
## $primary_purpose
## [1] "factor"
##
## $sponsor_type
## [1] "factor"
##
## $sample_size
## [1] "numeric"
##
## $vaccine
## [1] "factor"
##
## $conventional
## [1] "factor"
##
## $traditional
## [1] "factor"
##
## $subject_blind
## [1] "factor"
##
## $caregiver_blind
## [1] "factor"
##
## $investigator_blind
## [1] "factor"
##
## $outcome_blind
## [1] "factor"
##
## $analyst_blind
## [1] "factor"
##
## $covid
## [1] "logical"
##
## $start_year
## [1] "numeric"
Use the mice command to generate an initial predictor matrix (indicating which column variables are used to predict the row variables) without making any imputations.
set.seed(5)
mice_in <- mice::mice(indication_for_mice, maxit = 0, print = FALSE)
predictor_matrix <- mice_in$pred
correlation_matrix <- round(cor(sapply(indication_for_mice[,c(myvars, indication_direct_adjustment)], as.numeric), use = "pairwise.complete.obs"), 2)
# keep only upper triangle
correlation_matrix[lower.tri(correlation_matrix)] <- NA
melted_correlation_matrix <- reshape2::melt(correlation_matrix)
# lower triangle
melted_correlation_matrix$Var2 <- factor(melted_correlation_matrix$Var2, levels(melted_correlation_matrix$Var2)[seq(length(levels(melted_correlation_matrix$Var2)), 1, by = -1)], ordered = TRUE)
# labels_plot <- levels(melted_correlation_matrix[,1])
# labels_plot <- labels[levels(melted_correlation_matrix[,1]),]$short
labels_plot <- sapply(levels(melted_correlation_matrix[,1]), function(x) gsub("_", "", x))
ggplot(data = melted_correlation_matrix, aes(x = Var1, y = Var2, fill = value)) + geom_tile(color = "white") + xlab("") + ylab("") + scale_fill_gradient2(low = "darkorchid4", high = "green3", mid = "white", midpoint = 0, na.value = "white", limit = c(-1, 1), name = "Correlation") + scale_x_discrete(labels = labels_plot) + scale_y_discrete(labels = labels_plot[length(labels_plot):1]) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1),
panel.grid.major = element_blank(),
panel.border = element_blank(),
# panel.background = element_blank(),
panel.background = element_rect(fill = "white"),
axis.ticks = element_blank(),
legend.justification = c(1, 0),
legend.position = c(0.7, 0.7),
legend.direction = "horizontal") + guides(fill = guide_colorbar(barwidth = 7, barheight = 1, title.position = "top", title.hjust = 0.5))

# quickpred: quick selection procedure of predictors
# select predictors according to data relations with a minimum correlation of ρ = 0.25
mice_in2 <- mice::mice(indication_for_mice, pred = mice::quickpred(indication_for_mice, mincor = 0.25), print = FALSE)
mice_in2$pred
Modify the predictor matrix such that only phase, sample size and sponsor type are imputed, using all variables.
predictor_matrix[,"covid"] <- 1 # no need to set element in row 'covid' to 0 because the entire row will be set to 0 below
predictor_matrix[which(!(row.names(predictor_matrix) %in% c("phase_clean", "sample_size", "sponsor_type"))),] <- 0
Generate 10 imputed datasets using chained equations (using package mice).
indication_mice <- mice::mice(indication_for_mice, m = 10, pred = predictor_matrix , print = FALSE)
## Warning: Number of logged events: 150
attributes(indication_mice)
## $names
## [1] "data" "imp" "m" "where"
## [5] "blocks" "call" "nmis" "method"
## [9] "predictorMatrix" "visitSequence" "formulas" "post"
## [13] "blots" "ignore" "seed" "iteration"
## [17] "lastSeedValue" "chainMean" "chainVar" "loggedEvents"
## [21] "version" "date"
##
## $class
## [1] "mids"
Original data:
indication_mice$data %>% head
Imputed datasets:
indication_mice$imp %>% map(head)
## $control_arm
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $randomisation
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $blinding
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $prospective
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $source_registry
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $phase_clean
## 1 2 3 4 5 6 7 8
## 233 Phase 2 Phase 2 Phase 2 Phase 4 Phase 3 Phase 3 Phase 3 Phase 2
## 234 Phase 2 Phase 4 Phase 2 Phase 2 Phase 2 Phase 2 Phase 2 Phase 2
## 235 Phase 3 Phase 2 Phase 2 Phase 4 Phase 2 Phase 2 Phase 3 Phase 4
## 663 Phase 2 Phase 3 Phase 4 Phase 3 Phase 4 Phase 4 Phase 2 Phase 3
## 666 Phase 3 Phase 4 Phase 3 Undefined Phase 3 Phase 4 Phase 2 Phase 3
## 686 Undefined Undefined Phase 4 Phase 3 Phase 3 Undefined Phase 3 Phase 3
## 9 10
## 233 Phase 2 Phase 2
## 234 Phase 2 Phase 1
## 235 Phase 3 Phase 3
## 663 Phase 2 Phase 3
## 666 Undefined Phase 4
## 686 Phase 2 Phase 2
##
## $region_Africa
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $region_N_America
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $region_L_America
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $region_Asia
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $region_Europe
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $region_Oceania
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $multicentre
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $primary_purpose
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $sponsor_type
## 1 2 3 4 5
## 121 Non industry Non industry Non industry Non industry Non industry
## 179 Industry Non industry Industry Industry Non industry
## 224 Non industry Industry Non industry Industry Non industry
## 686 Investigator Industry Non industry Industry Non industry
## 689 Industry Non industry Non industry Non industry Industry
## 690 Non industry Industry Non industry Non industry Industry
## 6 7 8 9 10
## 121 Non industry Non industry Non industry Non industry Non industry
## 179 Non industry Industry Non industry Non industry Non industry
## 224 Non industry Investigator Non industry Industry Investigator
## 686 Non industry Non industry Industry Non industry Industry
## 689 Non industry Non industry Non industry Industry Industry
## 690 Non industry Non industry Industry Industry Industry
##
## $sample_size
## 1 2 3 4 5 6 7 8
## 350 4.094345 3.688879 6.719013 5.598422 6.214608 5.634790 6.784457 6.551080
## 679 4.787492 2.995732 4.634729 3.583519 4.007333 3.401197 4.905275 3.091042
## 1640 4.007333 3.583519 4.564348 3.688879 4.094345 4.828314 3.688879 2.995732
## 1642 4.787492 5.298317 6.907755 4.382027 4.094345 4.605170 3.637586 4.867534
## 9 10
## 350 5.703782 5.192957
## 679 3.688879 4.094345
## 1640 2.708050 4.158883
## 1642 4.941642 5.493061
##
## $vaccine
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $conventional
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $traditional
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $subject_blind
## 1 2 3 4 5 6 7 8 9 10
## 1 No Yes No No No Yes Yes No Yes Yes
## 3 No No Yes No No No Yes Yes Yes No
## 5 Yes No No No No No Yes Yes No No
## 6 No Yes Yes No No No Yes No Yes Yes
## 7 No No No No No Yes No No No No
## 8 No No No No No No Yes No Yes No
##
## $caregiver_blind
## 1 2 3 4 5 6 7 8 9 10
## 1 No No No No Yes No No Yes No No
## 3 No No No No No No No Yes No No
## 5 Yes No No No No No No No No No
## 6 No No No No No No Yes No No No
## 7 No Yes Yes No No No No No No No
## 8 Yes No No Yes Yes No No No No No
##
## $investigator_blind
## 1 2 3 4 5 6 7 8 9 10
## 1 No Yes No No Yes No No Yes Yes Yes
## 3 No No Yes No Yes Yes No No Yes No
## 5 No No Yes No No Yes No No Yes Yes
## 6 No No No Yes No Yes No No No Yes
## 7 Yes No No No Yes No No No No No
## 8 No No No No Yes Yes No No Yes No
##
## $outcome_blind
## 1 2 3 4 5 6 7 8 9 10
## 1 No No No Yes No Yes No Yes Yes No
## 3 No No No No No No No No No No
## 5 No No Yes No No No No No Yes No
## 6 No Yes No No No No No No No No
## 7 No No Yes Yes No No No No Yes No
## 8 No No No No No No No No No No
##
## $analyst_blind
## 1 2 3 4 5 6 7 8 9 10
## 1 No No No No No No No No No No
## 3 No No No No No No No No No No
## 5 No No No No No No No No No No
## 6 No No No No No No No No No No
## 7 No No No No No No No No No No
## 8 No No No No No No No No No No
##
## $covid
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $start_year
## 1 2 3 4 5 6 7 8 9 10
## 32 2020 2020 2019 2018 2020 2020 2020 2020 2016 2020
## 36 2012 2016 2017 2020 2020 2016 2020 2018 2020 2019
## 41 2020 2016 2019 2019 2020 2019 2020 2016 2016 2020
## 63 2020 2016 2017 2019 2017 2020 2020 2020 2020 2019
## 66 2012 2020 2016 2018 2020 2020 2020 2020 2016 2020
## 72 2020 2016 2017 2016 2020 2019 2020 2020 2020 2020
# In order to get the third imputed data set, use the complete() function
# c3 <- complete(imp, 3)
# md.pattern(c3)
# c.long <- complete(imp, "long") # "broad"
Only impute the three variables listed above.
method_vector <- indication_mice$method
method_vector[19:length(method_vector)] <- ""
# methods(mice)
Generate 10 imputed datasets using the updated method vector.
indication_mice <- mice::mice(indication_for_mice, method = method_vector, m = 10, pred = predictor_matrix, print = FALSE)
## Warning: Number of logged events: 150
plot(indication_mice)

Check that there is no trend with further iterations and that the lines mix.
indication_mice_40 <- mice::mice.mids(indication_mice, maxit = 40, print = FALSE)
plot(indication_mice_40)

Plot of observed (blue) and imputed (red) phase:
mice::stripplot(indication_mice, phase_clean ~ .imp, pch = 20, cex = 2)

# Under MCAR, univariate distributions of the observed and imputed data are expected to be identical. Under MAR, they can be different, both in location and spread, but their multivariate distribution is assumed to be identical.
mice::stripplot(indication_mice)

Analysis
Logistic regression analysis on the multiply imputed data.
indication_direct_models_mice <- lapply(myvars, function (x){
fit <- with(indication_mice, glm(as.formula(paste(x, "~", paste(indication_direct_adjustment, collapse = "+"))),
family = binomial(link = "logit")))
pooled_fit <- mice::pool(fit)
return(list("fit" = fit, "pooled_fit" = pooled_fit))
})
names(indication_direct_models_mice) <- myvars
lapply(indication_direct_models_mice, function(x) summary(x$pooled_fit))
## $control_arm
## term estimate std.error statistic df
## 1 (Intercept) -0.44457936 0.68472653 -0.64928017 1593.064
## 2 covidTRUE -0.79312207 0.19915981 -3.98233990 1600.216
## 3 source_registryCT.gov -1.30017243 0.49562668 -2.62328981 1600.137
## 4 source_registryCTRI -0.95435273 0.56883548 -1.67773067 1600.462
## 5 source_registryEUCTR -2.23460818 0.58194564 -3.83989158 1599.909
## 6 source_registryIRCT -0.59170126 0.56433545 -1.04849209 1599.866
## 7 source_registryJPRN -3.20584104 0.60645751 -5.28617588 1599.188
## 8 source_registryOther -1.43936808 0.60248929 -2.38903515 1599.633
## 9 phase_cleanPhase 2 0.87141484 0.27313916 3.19036946 1596.853
## 10 phase_cleanPhase 3 0.90214324 0.32042593 2.81545016 1573.425
## 11 phase_cleanPhase 4 0.16034556 0.30814163 0.52036320 1578.764
## 12 phase_cleanUndefined -0.11067291 0.31174235 -0.35501405 1599.644
## 13 region_AfricaYes 0.64783322 0.49408590 1.31117529 1600.414
## 14 region_N_AmericaYes -0.17891972 0.25403821 -0.70430242 1597.006
## 15 region_L_AmericaYes -0.04464200 0.33394166 -0.13368202 1600.073
## 16 region_AsiaYes -0.10862529 0.26691616 -0.40696406 1596.655
## 17 region_EuropeYes -0.01506037 0.28234915 -0.05333953 1599.029
## 18 region_OceaniaYes 0.32854311 0.51084523 0.64313630 1597.107
## 19 multicentreYes 0.04911306 0.20974009 0.23416151 1598.487
## 20 primary_purposePrevention 0.25450023 0.31394237 0.81065907 1599.662
## 21 primary_purposeTreatment 0.88079991 0.27722032 3.17725593 1599.821
## 22 sponsor_typeInvestigator 0.04286251 0.40943407 0.10468722 1597.602
## 23 sponsor_typeNon industry 0.13661701 0.22851966 0.59783484 1438.044
## 24 sample_size 0.69979352 0.08588239 8.14827716 1520.618
## p.value
## 1 5.162509e-01
## 2 7.129996e-05
## 3 8.791258e-03
## 4 9.359494e-02
## 5 1.278882e-04
## 6 2.945703e-01
## 7 1.421266e-07
## 8 1.700797e-02
## 9 1.448518e-03
## 10 4.931611e-03
## 11 6.028834e-01
## 12 7.226259e-01
## 13 1.899866e-01
## 14 4.813471e-01
## 15 8.936708e-01
## 16 6.840889e-01
## 17 9.574680e-01
## 18 5.202280e-01
## 19 8.148896e-01
## 20 4.176821e-01
## 21 1.515154e-03
## 22 9.166371e-01
## 23 5.500443e-01
## 24 8.881784e-16
##
## $randomisation
## term estimate std.error statistic df
## 1 (Intercept) -1.194287510 0.55858293 -2.138066620 1594.421
## 2 covidTRUE -0.769149836 0.17009034 -4.522007797 1600.539
## 3 source_registryCT.gov -0.612548428 0.36375194 -1.683972966 1600.102
## 4 source_registryCTRI -0.201444360 0.44380492 -0.453902946 1600.296
## 5 source_registryEUCTR -0.952540575 0.45472681 -2.094753503 1600.011
## 6 source_registryIRCT -0.309759773 0.40564902 -0.763615240 1599.326
## 7 source_registryJPRN -2.327311916 0.49749370 -4.678073107 1597.755
## 8 source_registryOther -0.696934384 0.47698442 -1.461126102 1599.603
## 9 phase_cleanPhase 2 0.795970854 0.23952133 3.323173156 1589.318
## 10 phase_cleanPhase 3 0.848847462 0.27743937 3.059578194 1589.395
## 11 phase_cleanPhase 4 0.074697604 0.26496734 0.281912494 1599.551
## 12 phase_cleanUndefined -0.074763369 0.27447340 -0.272388398 1595.511
## 13 region_AfricaYes 0.828715576 0.44913021 1.845156624 1600.538
## 14 region_N_AmericaYes -0.533889178 0.22546417 -2.367955774 1597.564
## 15 region_L_AmericaYes 0.080502488 0.30388967 0.264906960 1600.179
## 16 region_AsiaYes -0.122130262 0.23962348 -0.509675691 1597.840
## 17 region_EuropeYes -0.269747792 0.24706545 -1.091807015 1599.148
## 18 region_OceaniaYes 0.364121351 0.45404099 0.801956997 1598.626
## 19 multicentreYes 0.290782656 0.17995588 1.615855258 1599.485
## 20 primary_purposePrevention 0.068676912 0.27026777 0.254106928 1599.662
## 21 primary_purposeTreatment 1.019558885 0.24326225 4.191192345 1600.299
## 22 sponsor_typeInvestigator 0.001789449 0.36841923 0.004857101 1598.718
## 23 sponsor_typeNon industry -0.051288989 0.20101615 -0.255148603 1511.554
## 24 sample_size 0.623848183 0.07300873 8.544843231 1561.302
## p.value
## 1 3.266299e-02
## 2 6.576294e-06
## 3 9.238193e-02
## 4 6.499602e-01
## 5 3.634983e-02
## 6 4.452092e-01
## 7 3.139994e-06
## 8 1.441772e-01
## 9 9.102204e-04
## 10 2.253554e-03
## 11 7.780471e-01
## 12 7.853586e-01
## 13 6.519939e-02
## 14 1.800517e-02
## 15 7.911153e-01
## 16 6.103491e-01
## 17 2.750824e-01
## 18 4.226970e-01
## 19 1.063228e-01
## 20 7.994456e-01
## 21 2.926566e-05
## 22 9.961252e-01
## 23 7.986430e-01
## 24 0.000000e+00
##
## $blinding
## term estimate std.error statistic df
## 1 (Intercept) -1.58908581 0.43696132 -3.63667384 1598.302
## 2 covidTRUE -0.98732558 0.12412289 -7.95441959 1600.368
## 3 source_registryCT.gov 0.80313242 0.28597426 2.80840806 1600.766
## 4 source_registryCTRI 0.03450912 0.34628062 0.09965652 1600.668
## 5 source_registryEUCTR 0.25549607 0.34734831 0.73556158 1600.685
## 6 source_registryIRCT 1.10485101 0.31127316 3.54945797 1600.668
## 7 source_registryJPRN -0.21503829 0.44020449 -0.48849635 1597.068
## 8 source_registryOther -0.02627937 0.36488703 -0.07202055 1600.663
## 9 phase_cleanPhase 2 0.48806949 0.20823474 2.34384275 1599.830
## 10 phase_cleanPhase 3 0.11690108 0.22340582 0.52326785 1599.955
## 11 phase_cleanPhase 4 -0.47795748 0.23511757 -2.03284457 1599.331
## 12 phase_cleanUndefined -0.05468179 0.24372521 -0.22435837 1598.526
## 13 region_AfricaYes 0.36251480 0.23379322 1.55057877 1600.810
## 14 region_N_AmericaYes -0.22888238 0.15811427 -1.44757577 1599.638
## 15 region_L_AmericaYes 0.54534625 0.20464557 2.66483289 1600.416
## 16 region_AsiaYes -0.12031202 0.16603890 -0.72460140 1600.312
## 17 region_EuropeYes -0.17056651 0.17294261 -0.98626078 1600.425
## 18 region_OceaniaYes 0.58358419 0.31842434 1.83272484 1600.589
## 19 multicentreYes 0.18776497 0.13095073 1.43385971 1600.816
## 20 primary_purposePrevention 0.51969705 0.24466619 2.12410657 1600.671
## 21 primary_purposeTreatment 0.40616298 0.22209853 1.82875128 1600.652
## 22 sponsor_typeInvestigator -0.02396366 0.27477845 -0.08721083 1586.386
## 23 sponsor_typeNon industry -0.59022606 0.15161707 -3.89287341 1582.725
## 24 sample_size 0.31587109 0.04842993 6.52222955 1597.516
## p.value
## 1 2.849435e-04
## 2 3.330669e-15
## 3 5.039155e-03
## 4 9.206295e-01
## 5 4.621053e-01
## 6 3.971802e-04
## 7 6.252654e-01
## 8 9.425946e-01
## 9 1.920804e-02
## 10 6.008603e-01
## 11 4.223311e-02
## 12 8.225071e-01
## 13 1.212003e-01
## 14 1.479317e-01
## 15 7.780068e-03
## 16 4.688025e-01
## 17 3.241542e-01
## 18 6.702917e-02
## 19 1.518076e-01
## 20 3.381418e-02
## 21 6.762287e-02
## 22 9.305149e-01
## 23 1.031798e-04
## 24 9.269185e-11
##
## $prospective
## term estimate std.error statistic df
## 1 (Intercept) -0.75517256 0.42772090 -1.76557320 1593.218
## 2 covidTRUE 0.06653992 0.12718724 0.52316505 1599.975
## 3 source_registryCT.gov 0.15562652 0.25960741 0.59946872 1600.019
## 4 source_registryCTRI 2.29377635 0.45895856 4.99778528 1600.153
## 5 source_registryEUCTR 0.25425402 0.33623656 0.75617602 1600.343
## 6 source_registryIRCT -0.83187679 0.28893317 -2.87913219 1595.844
## 7 source_registryJPRN 0.07333326 0.41435204 0.17698299 1599.649
## 8 source_registryOther 0.03287726 0.34924311 0.09413861 1600.272
## 9 phase_cleanPhase 2 0.33163662 0.21392262 1.55026434 1593.456
## 10 phase_cleanPhase 3 0.07682503 0.22936019 0.33495363 1572.113
## 11 phase_cleanPhase 4 -0.03802994 0.23166063 -0.16416227 1596.806
## 12 phase_cleanUndefined -0.17161955 0.24408737 -0.70310703 1587.019
## 13 region_AfricaYes 0.54178023 0.26407030 2.05165150 1600.807
## 14 region_N_AmericaYes 0.65450271 0.17793035 3.67842093 1600.748
## 15 region_L_AmericaYes -0.35420678 0.21123628 -1.67682738 1600.746
## 16 region_AsiaYes 0.01217093 0.17815220 0.06831761 1600.481
## 17 region_EuropeYes 0.23756270 0.19050099 1.24704184 1600.532
## 18 region_OceaniaYes 1.25152681 0.40255741 3.10893995 1600.789
## 19 multicentreYes 0.46159292 0.13647021 3.38237137 1600.695
## 20 primary_purposePrevention -0.28986281 0.24512881 -1.18249185 1600.208
## 21 primary_purposeTreatment 0.12275457 0.22142511 0.55438412 1600.334
## 22 sponsor_typeInvestigator 0.27514737 0.30685720 0.89666261 1594.032
## 23 sponsor_typeNon industry 0.11781716 0.15833678 0.74409217 1592.379
## 24 sample_size 0.14095973 0.04889687 2.88279635 1572.067
## p.value
## 1 7.765900e-02
## 2 6.009318e-01
## 3 5.489452e-01
## 4 6.435482e-07
## 5 4.496550e-01
## 6 4.040868e-03
## 7 8.595442e-01
## 8 9.250108e-01
## 9 1.212767e-01
## 10 7.377047e-01
## 11 8.696242e-01
## 12 4.820922e-01
## 13 4.036593e-02
## 14 2.424459e-04
## 15 9.377149e-02
## 16 9.455413e-01
## 17 2.125646e-01
## 18 1.910668e-03
## 19 7.359496e-04
## 20 2.371863e-01
## 21 5.793934e-01
## 22 3.700344e-01
## 23 4.569305e-01
## 24 3.995209e-03
sum_indication_direct_mice <- lapply(indication_direct_models_mice, function(x) summary(x$pooled_fit))
# OR and 95% CI (Bonferroni corrected)
pool_OR_indication_direct_mice <- lapply(sum_indication_direct_mice, function(x) {
cbind(exp(cbind(x[2,2], (x[2,2]-z*(x[2,3])),
(x[2,2]+z*(x[2,3])))), x[2,6])
})
pool_OR_indication_direct_mice <- do.call(rbind.data.frame, pool_OR_indication_direct_mice)
pool_OR_indication_direct_mice$Analysis <- "IM direct (5)"
# sjPlot::tab_model(indication_direct_models_mice)
# sjPlot::plot_models(indication_direct_models_mice, prefix.labels = "varname") + theme_bw()
fmi: fraction of information about the coefficients missing due to nonresponse
lambda: proportion of the variation attributable to the missing data
mice::densityplot(indication_mice, ~ phase_clean)

mice::densityplot(indication_mice, ~ sponsor_type)

mice::densityplot(indication_mice, ~ sample_size)

Compare complete case analysis with MICE analysis for the each outcome
Control arm
summary(glm(as.formula(paste("control_arm", "~", paste(indication_direct_adjustment, collapse = "+"))), family = binomial(link = "logit"), data = indication_dataset))
##
## Call:
## glm(formula = as.formula(paste("control_arm", "~", paste(indication_direct_adjustment,
## collapse = "+"))), family = binomial(link = "logit"), data = indication_dataset)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.8768 0.1994 0.3271 0.4872 2.0094
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.40142 0.68659 -0.585 0.558774
## covidTRUE -0.78471 0.20291 -3.867 0.000110 ***
## source_registryCT.gov -1.30534 0.49569 -2.633 0.008455 **
## source_registryCTRI -0.98018 0.57007 -1.719 0.085540 .
## source_registryEUCTR -2.23270 0.58280 -3.831 0.000128 ***
## source_registryIRCT -0.57126 0.56479 -1.011 0.311803
## source_registryJPRN -2.97240 0.65426 -4.543 5.54e-06 ***
## source_registryOther -1.32440 0.62151 -2.131 0.033095 *
## phase_cleanPhase 2 0.87639 0.27466 3.191 0.001419 **
## phase_cleanPhase 3 0.86491 0.32103 2.694 0.007056 **
## phase_cleanPhase 4 0.15861 0.30894 0.513 0.607674
## phase_cleanUndefined -0.07839 0.31609 -0.248 0.804143
## region_AfricaYes 0.61576 0.49438 1.246 0.212940
## region_N_AmericaYes -0.22326 0.25664 -0.870 0.384330
## region_L_AmericaYes 0.06006 0.34796 0.173 0.862954
## region_AsiaYes -0.14831 0.26834 -0.553 0.580480
## region_EuropeYes -0.07272 0.28773 -0.253 0.800469
## region_OceaniaYes 0.22712 0.51379 0.442 0.658453
## multicentreYes 0.15636 0.21642 0.722 0.469994
## primary_purposePrevention 0.28722 0.31873 0.901 0.367510
## primary_purposeTreatment 0.85701 0.28053 3.055 0.002251 **
## sponsor_typeInvestigator 0.05360 0.40996 0.131 0.895986
## sponsor_typeNon industry 0.15374 0.22947 0.670 0.502866
## sample_size 0.68906 0.08597 8.015 1.10e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1144.33 on 1609 degrees of freedom
## Residual deviance: 946.04 on 1586 degrees of freedom
## (47 observations deleted due to missingness)
## AIC: 994.04
##
## Number of Fisher Scoring iterations: 6
fit <- with(indication_mice, glm(as.formula(paste("control_arm", "~", paste(indication_direct_adjustment, collapse = "+"))), family = binomial(link = "logit")))
pooled_fit <- mice::pool(fit)
pooled_fit$pooled
summary(pooled_fit)
Randomisation
summary(glm(as.formula(paste("randomisation", "~", paste(indication_direct_adjustment, collapse = "+"))), family = binomial(link = "logit"), data = indication_dataset))
##
## Call:
## glm(formula = as.formula(paste("randomisation", "~", paste(indication_direct_adjustment,
## collapse = "+"))), family = binomial(link = "logit"), data = indication_dataset)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.4006 0.2319 0.4020 0.5920 2.2030
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.163344 0.560294 -2.076 0.037865 *
## covidTRUE -0.721661 0.172022 -4.195 2.73e-05 ***
## source_registryCT.gov -0.631524 0.363234 -1.739 0.082102 .
## source_registryCTRI -0.239598 0.444577 -0.539 0.589932
## source_registryEUCTR -0.995336 0.455771 -2.184 0.028973 *
## source_registryIRCT -0.299489 0.405439 -0.739 0.460102
## source_registryJPRN -2.135571 0.541098 -3.947 7.92e-05 ***
## source_registryOther -0.414378 0.501303 -0.827 0.408463
## phase_cleanPhase 2 0.810087 0.240263 3.372 0.000747 ***
## phase_cleanPhase 3 0.828245 0.278058 2.979 0.002895 **
## phase_cleanPhase 4 0.075342 0.265730 0.284 0.776771
## phase_cleanUndefined -0.030325 0.277135 -0.109 0.912866
## region_AfricaYes 0.799657 0.448957 1.781 0.074889 .
## region_N_AmericaYes -0.536686 0.227304 -2.361 0.018221 *
## region_L_AmericaYes 0.120126 0.311840 0.385 0.700077
## region_AsiaYes -0.142274 0.240638 -0.591 0.554361
## region_EuropeYes -0.240252 0.252141 -0.953 0.340666
## region_OceaniaYes 0.198177 0.455835 0.435 0.663741
## multicentreYes 0.345967 0.184246 1.878 0.060417 .
## primary_purposePrevention 0.081070 0.273621 0.296 0.767012
## primary_purposeTreatment 0.965373 0.245692 3.929 8.52e-05 ***
## sponsor_typeInvestigator -0.002579 0.368211 -0.007 0.994411
## sponsor_typeNon industry -0.041474 0.201824 -0.205 0.837186
## sample_size 0.614657 0.073154 8.402 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1459.4 on 1609 degrees of freedom
## Residual deviance: 1211.8 on 1586 degrees of freedom
## (47 observations deleted due to missingness)
## AIC: 1259.8
##
## Number of Fisher Scoring iterations: 5
fit <- with(indication_mice, glm(as.formula(paste("randomisation", "~", paste(indication_direct_adjustment, collapse = "+"))), family = binomial(link = "logit")))
pooled_fit <- mice::pool(fit)
pooled_fit$pooled
summary(pooled_fit)
Blinding
summary(glm(as.formula(paste("blinding", "~", paste(indication_direct_adjustment, collapse = "+"))), family = binomial(link = "logit"), data = indication_dataset))
##
## Call:
## glm(formula = as.formula(paste("blinding", "~", paste(indication_direct_adjustment,
## collapse = "+"))), family = binomial(link = "logit"), data = indication_dataset)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.2384 -1.0519 0.5217 0.9879 2.1342
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.55925 0.43688 -3.569 0.000358 ***
## covidTRUE -0.96597 0.12477 -7.742 9.76e-15 ***
## source_registryCT.gov 0.79869 0.28583 2.794 0.005202 **
## source_registryCTRI 0.05285 0.34712 0.152 0.878997
## source_registryEUCTR 0.26120 0.34740 0.752 0.452128
## source_registryIRCT 1.10526 0.31118 3.552 0.000383 ***
## source_registryJPRN 0.31122 0.47865 0.650 0.515565
## source_registryOther 0.04078 0.36688 0.111 0.911494
## phase_cleanPhase 2 0.49032 0.20826 2.354 0.018554 *
## phase_cleanPhase 3 0.11831 0.22348 0.529 0.596527
## phase_cleanPhase 4 -0.46494 0.23531 -1.976 0.048172 *
## phase_cleanUndefined -0.05946 0.24425 -0.243 0.807653
## region_AfricaYes 0.35186 0.23350 1.507 0.131827
## region_N_AmericaYes -0.24667 0.15829 -1.558 0.119154
## region_L_AmericaYes 0.55333 0.20587 2.688 0.007193 **
## region_AsiaYes -0.13132 0.16598 -0.791 0.428836
## region_EuropeYes -0.18028 0.17365 -1.038 0.299178
## region_OceaniaYes 0.53145 0.31786 1.672 0.094533 .
## multicentreYes 0.22378 0.13191 1.697 0.089791 .
## primary_purposePrevention 0.51475 0.24562 2.096 0.036108 *
## primary_purposeTreatment 0.38330 0.22270 1.721 0.085217 .
## sponsor_typeInvestigator -0.01946 0.27477 -0.071 0.943552
## sponsor_typeNon industry -0.58791 0.15207 -3.866 0.000111 ***
## sample_size 0.31023 0.04849 6.397 1.58e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2222.1 on 1609 degrees of freedom
## Residual deviance: 1936.2 on 1586 degrees of freedom
## (47 observations deleted due to missingness)
## AIC: 1984.2
##
## Number of Fisher Scoring iterations: 4
fit <- with(indication_mice, glm(as.formula(paste("blinding", "~", paste(indication_direct_adjustment, collapse = "+"))), family = binomial(link = "logit")))
pooled_fit <- mice::pool(fit)
pooled_fit$pooled
summary(pooled_fit)
Prospective registration
summary(glm(as.formula(paste("prospective", "~", paste(indication_direct_adjustment, collapse = "+"))), family = binomial(link = "logit"), data = indication_dataset))
##
## Call:
## glm(formula = as.formula(paste("prospective", "~", paste(indication_direct_adjustment,
## collapse = "+"))), family = binomial(link = "logit"), data = indication_dataset)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.4501 -1.0823 0.6158 0.8673 1.6632
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.78420 0.42822 -1.831 0.067054 .
## covidTRUE 0.01711 0.12844 0.133 0.894015
## source_registryCT.gov 0.15752 0.25972 0.606 0.544191
## source_registryCTRI 2.28225 0.45985 4.963 6.94e-07 ***
## source_registryEUCTR 0.29496 0.33695 0.875 0.381364
## source_registryIRCT -0.83231 0.28898 -2.880 0.003975 **
## source_registryJPRN -0.16557 0.45385 -0.365 0.715246
## source_registryOther -0.05469 0.35306 -0.155 0.876896
## phase_cleanPhase 2 0.31075 0.21410 1.451 0.146653
## phase_cleanPhase 3 0.07361 0.22925 0.321 0.748161
## phase_cleanPhase 4 -0.06518 0.23208 -0.281 0.778819
## phase_cleanUndefined -0.18603 0.24509 -0.759 0.447841
## region_AfricaYes 0.53065 0.26437 2.007 0.044724 *
## region_N_AmericaYes 0.64307 0.17803 3.612 0.000304 ***
## region_L_AmericaYes -0.34954 0.21266 -1.644 0.100246
## region_AsiaYes -0.00513 0.17851 -0.029 0.977073
## region_EuropeYes 0.19764 0.19096 1.035 0.300689
## region_OceaniaYes 1.28801 0.40544 3.177 0.001489 **
## multicentreYes 0.45061 0.13767 3.273 0.001064 **
## primary_purposePrevention -0.28508 0.24671 -1.156 0.247860
## primary_purposeTreatment 0.17350 0.22303 0.778 0.436608
## sponsor_typeInvestigator 0.27612 0.30738 0.898 0.369017
## sponsor_typeNon industry 0.11510 0.15904 0.724 0.469269
## sample_size 0.15266 0.04908 3.111 0.001867 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2033.0 on 1609 degrees of freedom
## Residual deviance: 1801.7 on 1586 degrees of freedom
## (47 observations deleted due to missingness)
## AIC: 1849.7
##
## Number of Fisher Scoring iterations: 5
fit <- with(indication_mice, glm(as.formula(paste("prospective", "~", paste(indication_direct_adjustment, collapse = "+"))), family = binomial(link = "logit")))
pooled_fit <- mice::pool(fit)
pooled_fit$pooled
summary(pooled_fit)
Total effect (analyses 6 and 7)
We repeat the main analysis adjusting only for confounding variables.
Complete case analysis
Main dataset
# define adjustment set. This is the same for the main and indication dataset
main_total_adjustment <- c("covid", "sponsor_type")
indication_total_adjustment <- main_total_adjustment
main_total_models <- lapply(myvars, function (x){
glm(as.formula(paste(x, "~", paste(main_total_adjustment, collapse = "+"))),
family = binomial(link = "logit"),
data = main_dataset)
})
names(main_total_models) <- paste0(myvars, "_(total)")
# lapply(main_total_models, summary)
sjPlot::tab_model(main_total_models)
|
|
Control arm
|
randomisation
|
Blinding
|
Prospective registration
|
|
Predictors
|
Odds Ratios
|
CI
|
p
|
Odds Ratios
|
CI
|
p
|
Odds Ratios
|
CI
|
p
|
Odds Ratios
|
CI
|
p
|
|
(Intercept)
|
2.88
|
2.31 – 3.62
|
<0.001
|
2.24
|
1.82 – 2.77
|
<0.001
|
1.18
|
0.98 – 1.43
|
0.085
|
4.94
|
3.90 – 6.33
|
<0.001
|
|
covidTRUE
|
2.19
|
1.68 – 2.88
|
<0.001
|
1.93
|
1.52 – 2.45
|
<0.001
|
0.98
|
0.80 – 1.20
|
0.828
|
0.79
|
0.63 – 0.99
|
0.040
|
Sponsor type: Investigator
|
0.82
|
0.48 – 1.43
|
0.460
|
0.88
|
0.54 – 1.48
|
0.620
|
0.82
|
0.53 – 1.28
|
0.385
|
1.22
|
0.69 – 2.29
|
0.515
|
Sponsor type: Non industry
|
1.18
|
0.90 – 1.56
|
0.233
|
1.10
|
0.85 – 1.41
|
0.461
|
0.63
|
0.50 – 0.78
|
<0.001
|
0.44
|
0.33 – 0.57
|
<0.001
|
|
Observations
|
1608
|
1608
|
1608
|
1608
|
|
R2 Tjur
|
0.025
|
0.021
|
0.012
|
0.036
|
sjPlot::plot_models(main_total_models, prefix.labels = "varname") + theme_bw() + geom_hline(yintercept = 1)

# library(sjmisc)
Indication-matched dataset
We repeat the same analyses on the indication-matched dataset.
# logistic regression for each outcome
indication_total_models <- lapply(myvars, function (x){
glm(as.formula(paste(x, "~", paste(indication_total_adjustment, collapse = "+"))),
family = binomial(link = "logit"),
data = indication_dataset)
})
names(indication_total_models) <- paste0(myvars, "_(total_indication)")
# lapply(indication_total_models, summary)
# lapply(indication_total_models, function(x) knitr::kable(exp(confint(x)), digits = 2))
# lapply(indication_total_models, function(x) print(sjPlot::tab_model(x)))
sjPlot::tab_model(indication_total_models)
|
|
Control arm
|
randomisation
|
Blinding
|
Prospective registration
|
|
Predictors
|
Odds Ratios
|
CI
|
p
|
Odds Ratios
|
CI
|
p
|
Odds Ratios
|
CI
|
p
|
Odds Ratios
|
CI
|
p
|
|
(Intercept)
|
8.31
|
6.08 – 11.62
|
<0.001
|
6.00
|
4.54 – 8.04
|
<0.001
|
3.05
|
2.44 – 3.83
|
<0.001
|
2.72
|
2.17 – 3.43
|
<0.001
|
|
covidTRUE
|
0.92
|
0.68 – 1.25
|
0.600
|
0.97
|
0.75 – 1.25
|
0.819
|
0.53
|
0.43 – 0.65
|
<0.001
|
1.09
|
0.89 – 1.34
|
0.413
|
Sponsor type: Investigator
|
0.79
|
0.41 – 1.62
|
0.495
|
0.82
|
0.45 – 1.56
|
0.517
|
0.65
|
0.41 – 1.06
|
0.080
|
1.25
|
0.73 – 2.22
|
0.430
|
Sponsor type: Non industry
|
0.90
|
0.63 – 1.26
|
0.542
|
0.73
|
0.53 – 0.99
|
0.043
|
0.40
|
0.31 – 0.50
|
<0.001
|
0.62
|
0.49 – 0.79
|
<0.001
|
|
Observations
|
1639
|
1639
|
1639
|
1639
|
|
R2 Tjur
|
0.001
|
0.003
|
0.065
|
0.012
|
sjPlot::plot_models(indication_total_models, prefix.labels = "varname") + theme_bw() + geom_hline(yintercept = 1)

Multiple imputation
Phase, sample size (for 3 trials) and sponsor type have missing values. Here only sponsor type is to be imputed.
Main dataset
Use the mice command to generate an initial predictor matrix (indicating which column variables are used to predict the row variables) without making any imputations.
set.seed(5)
mice_in <- mice::mice(main_for_mice, maxit = 0, print = FALSE)
# mice_in <- mice::mice(main_dataset, maxit = 0, print = FALSE)
predictor_matrix <- mice_in$pred
# warning message: Number of logged events: 1
correlation_matrix <- round(cor(sapply(main_for_mice[,c(myvars, main_total_adjustment)], as.numeric), use = "pairwise.complete.obs"), 2)
# correlation_matrix <- round(cor(na.omit(values)), 2)
# keep only upper triangle
correlation_matrix[lower.tri(correlation_matrix)] <- NA
melted_correlation_matrix <- reshape2::melt(correlation_matrix)
# lower triangle
melted_correlation_matrix$Var2 <- factor(melted_correlation_matrix$Var2, levels(melted_correlation_matrix$Var2)[seq(length(levels(melted_correlation_matrix$Var2)), 1, by = -1)], ordered = TRUE)
# labels_plot <- levels(melted_correlation_matrix[,1])
# labels_plot <- labels[levels(melted_correlation_matrix[,1]),]$short
labels_plot <- sapply(levels(melted_correlation_matrix[,1]), function(x) gsub("_", "", x))
ggplot(data = melted_correlation_matrix, aes(x = Var1, y = Var2, fill = value)) + geom_tile(color = "white") + xlab("") + ylab("") + scale_fill_gradient2(low = "darkorchid4", high = "green3", mid = "white", midpoint = 0, na.value = "white", limit = c(-1, 1), name = "Correlation") + scale_x_discrete(labels = labels_plot) + scale_y_discrete(labels = labels_plot[length(labels_plot):1]) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1),
panel.grid.major = element_blank(),
panel.border = element_blank(),
# panel.background = element_blank(),
panel.background = element_rect(fill = "white"),
axis.ticks = element_blank(),
legend.justification = c(1, 0),
legend.position = c(0.7, 0.7),
legend.direction = "horizontal") + guides(fill = guide_colorbar(barwidth = 7, barheight = 1, title.position = "top", title.hjust = 0.5))

# quickpred: quick selection procedure of predictors
# select predictors according to data relations with a minimum correlation of ρ = 0.25
mice_in2 <- mice::mice(main_for_mice, pred = mice::quickpred(main_for_mice, mincor = 0.25), print = FALSE)
mice_in2$pred
Modify the predictor matrix such that only sponsor type is imputed, using all variables.
predictor_matrix[,"covid"] <- 1 # no need to set element in row 'covid' to 0 because the entire row will be set to 0 below
predictor_matrix[which(!(row.names(predictor_matrix) %in% c("sponsor_type"))),] <- 0
Generate 10 imputed datasets using chained equations (using package mice).
main_mice <- mice::mice(main_for_mice, m = 10, pred = predictor_matrix , print = FALSE)
## Warning: Number of logged events: 50
attributes(main_mice)
## $names
## [1] "data" "imp" "m" "where"
## [5] "blocks" "call" "nmis" "method"
## [9] "predictorMatrix" "visitSequence" "formulas" "post"
## [13] "blots" "ignore" "seed" "iteration"
## [17] "lastSeedValue" "chainMean" "chainVar" "loggedEvents"
## [21] "version" "date"
##
## $class
## [1] "mids"
Original data:
main_mice$data %>% head
Imputed datasets:
main_mice$imp %>% map(head)
## $control_arm
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $randomisation
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $blinding
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $prospective
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $source_registry
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $phase_clean
## 1 2 3 4 5 6 7 8
## 758 Phase 3 Phase 3 Phase 1 Phase 3 Undefined Phase 1 Phase 2 Phase 3
## 775 Phase 2 Phase 4 Phase 4 Undefined Phase 2 Phase 1 Phase 2 Phase 1
## 790 Phase 3 Phase 2 Undefined Phase 2 Phase 2 Phase 2 Phase 3 Phase 4
## 800 Phase 2 Undefined Phase 4 Undefined Phase 2 Phase 2 Phase 2 Phase 3
## 802 Phase 2 Undefined Phase 3 Phase 2 Phase 3 Phase 3 Phase 4 Phase 3
## 809 Phase 4 Phase 1 Phase 2 Phase 2 Phase 2 Phase 2 Phase 3 Phase 3
## 9 10
## 758 Phase 4 Phase 4
## 775 Phase 4 Phase 2
## 790 Phase 2 Phase 3
## 800 Phase 3 Undefined
## 802 Phase 1 Phase 3
## 809 Phase 2 Phase 3
##
## $region_Africa
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $region_N_America
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $region_L_America
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $region_Asia
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $region_Europe
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $region_Oceania
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $multicentre
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $primary_purpose
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $sponsor_type
## 1 2 3 4 5
## 133 Non industry Non industry Non industry Non industry Non industry
## 145 Non industry Investigator Non industry Investigator Non industry
## 227 Industry Industry Industry Non industry Industry
## 272 Non industry Non industry Non industry Investigator Non industry
## 765 Industry Non industry Non industry Industry Non industry
## 766 Non industry Non industry Non industry Non industry Non industry
## 6 7 8 9 10
## 133 Non industry Non industry Non industry Non industry Non industry
## 145 Investigator Non industry Non industry Industry Non industry
## 227 Industry Non industry Industry Non industry Industry
## 272 Non industry Non industry Non industry Non industry Non industry
## 765 Investigator Industry Industry Non industry Industry
## 766 Non industry Non industry Non industry Non industry Non industry
##
## $sample_size
## 1 2 3 4 5 6 7 8
## 1646 2.995732 6.366470 6.620073 6.492240 3.761200 4.65396 2.708050 3.465736
## 1648 4.094345 5.703782 5.247024 3.401197 3.465736 10.15813 3.401197 4.394449
## 9 10
## 1646 3.401197 4.248495
## 1648 3.401197 4.094345
##
## $vaccine
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $conventional
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $traditional
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $subject_blind
## 1 2 3 4 5 6 7 8 9 10
## 1 No Yes Yes No No No No No No Yes
## 2 Yes Yes No No Yes No No Yes Yes No
## 3 Yes Yes Yes No No Yes No No Yes No
## 6 Yes Yes No Yes Yes No Yes No No No
## 8 Yes Yes Yes No No Yes Yes No No Yes
## 10 No No Yes No No No No No Yes No
##
## $caregiver_blind
## 1 2 3 4 5 6 7 8 9 10
## 1 No No Yes No Yes No No Yes Yes No
## 2 No No No No No No No Yes No No
## 3 No Yes No No No No Yes Yes No No
## 6 No Yes No No No No No No No No
## 8 No No No Yes No No No No No No
## 10 No No No No No No Yes No No Yes
##
## $investigator_blind
## 1 2 3 4 5 6 7 8 9 10
## 1 No No Yes No No Yes No No No No
## 2 No Yes No No No Yes Yes Yes No Yes
## 3 Yes No No No No No No No No No
## 6 No No No No No No No No No Yes
## 8 Yes No Yes No Yes No No No No No
## 10 No No No No No No No No No No
##
## $outcome_blind
## 1 2 3 4 5 6 7 8 9 10
## 1 No Yes No No No Yes Yes No No No
## 2 No No Yes No No No No No No No
## 3 No No No No No No No No No Yes
## 6 Yes No No No No No No No Yes No
## 8 No No No No Yes Yes No Yes No No
## 10 No No No No No No Yes No No No
##
## $analyst_blind
## 1 2 3 4 5 6 7 8 9 10
## 1 No No No No No No No No No No
## 2 No No No No No No No No No No
## 3 No No No No No No No No No No
## 6 No No No No No No No No No No
## 8 No No No No No No No No No No
## 10 No No No No No No No No No No
##
## $covid
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $start_year
## 1 2 3 4 5 6 7 8 9 10
## 32 2020 2019 2020 2020 2018 2020 2020 2019 2019 2020
## 34 2020 2019 2020 2019 2020 2019 2020 2019 2019 2019
## 35 2020 2020 2019 2020 2018 2020 2020 2020 2019 2020
## 48 2020 2018 2020 2019 2018 2020 2020 2020 2020 2020
## 60 2020 2019 2020 2019 2018 2019 2020 2020 2019 2019
## 66 2020 2020 2020 2020 2020 2019 2019 2020 2019 2019
# In order to get the third imputed data set, use the complete() function
# c3 <- complete(imp, 3)
# md.pattern(c3)
# c.long <- complete(imp, "long") # "broad"
Only impute sponsor type.
method_vector <- main_mice$method
method_vector[-15] <- ""
# methods(mice)
Generate 10 imputed datasets using the updated method vector.
main_mice <- mice::mice(main_for_mice, method = method_vector, m = 10, pred = predictor_matrix, print = FALSE)
## Warning: Number of logged events: 50
plot(main_mice)

Check that there is no trend with further iterations and that the lines mix.
main_mice_40 <- mice::mice.mids(main_mice, maxit = 40, print = FALSE)
plot(main_mice_40)

Plot of observed (blue) and imputed (red) phase:
mice::stripplot(main_mice, phase_clean ~ .imp, pch = 20, cex = 2)

# Under MCAR, univariate distributions of the observed and imputed data are expected to be identical. Under MAR, they can be different, both in location and spread, but their multivariate distribution is assumed to be identical.
mice::stripplot(main_mice)

Analysis
Logistic regression analysis on the multiply imputed data.
main_total_models_mice <- lapply(myvars, function (x){
fit <- with(main_mice, glm(as.formula(paste(x, "~", paste(main_total_adjustment, collapse = "+"))),
family = binomial(link = "logit")))
pooled_fit <- mice::pool(fit)
return(list("fit" = fit, "pooled_fit" = pooled_fit))
})
names(main_total_models_mice) <- myvars
lapply(main_total_models_mice, function(x) summary(x$pooled_fit))
## $control_arm
## term estimate std.error statistic df
## 1 (Intercept) 1.0313801 0.1130089 9.126537 1555.5636
## 2 covidTRUE 0.7899260 0.1351990 5.842692 1631.8535
## 3 sponsor_typeInvestigator -0.2952479 0.2602657 -1.134409 649.2243
## 4 sponsor_typeNon industry 0.1931466 0.1401995 1.377655 1580.7096
## p.value
## 1 0.000000e+00
## 2 6.188265e-09
## 3 2.570415e-01
## 4 1.685048e-01
##
## $randomisation
## term estimate std.error statistic df
## 1 (Intercept) 0.7908084 0.1058102 7.4738425 1578.3533
## 2 covidTRUE 0.6572158 0.1207774 5.4415445 1632.1633
## 3 sponsor_typeInvestigator -0.1900314 0.2458600 -0.7729251 746.8693
## 4 sponsor_typeNon industry 0.1102592 0.1283830 0.8588302 1592.9243
## p.value
## 1 1.283418e-13
## 2 6.085322e-08
## 3 4.398112e-01
## 4 3.905634e-01
##
## $blinding
## term estimate std.error statistic df
## 1 (Intercept) 0.100787372 0.09528738 1.05772009 1576.1721
## 2 covidTRUE 0.008050086 0.10175125 0.07911535 1632.5333
## 3 sponsor_typeInvestigator -0.381094393 0.22112888 -1.72340398 751.1032
## 4 sponsor_typeNon industry -0.423466399 0.11116590 -3.80931905 1595.8302
## p.value
## 1 0.2903451504
## 2 0.9369505710
## 3 0.0852270346
## 4 0.0001446336
##
## $prospective
## term estimate std.error statistic df
## 1 (Intercept) 1.48940017 0.1198595 12.4262132 1518.1648
## 2 covidTRUE -0.15762260 0.1126263 -1.3995181 1632.1952
## 3 sponsor_typeInvestigator -0.04690336 0.2785860 -0.1683622 482.1239
## 4 sponsor_typeNon industry -0.76373089 0.1325281 -5.7627829 1524.0708
## p.value
## 1 0.000000e+00
## 2 1.618477e-01
## 3 8.663689e-01
## 4 9.994819e-09
sum_main_total_mice <- lapply(main_total_models_mice, function(x) summary(x$pooled_fit))
# OR and 95% CI (Bonferroni corrected)
pool_OR_main_total_mice <- lapply(sum_main_total_mice, function(x) {
cbind(exp(cbind(x[2,2], (x[2,2]-z*(x[2,3])),
(x[2,2]+z*(x[2,3])))), x[2,6])
})
do.call(rbind.data.frame, pool_OR_main_total_mice)
pool_OR_main_total_mice <- do.call(rbind.data.frame, pool_OR_main_total_mice)
pool_OR_main_total_mice$Analysis <- "Main total (6)"
# sjPlot::tab_model(main_total_models_mice)
# sjPlot::plot_models(main_total_models_mice, prefix.labels = "varname") + theme_bw()
fmi: fraction of information about the coefficients missing due to nonresponse
lambda: proportion of the variation attributable to the missing data
mice::densityplot(main_mice, ~ sponsor_type)

Compare complete case analysis with MICE analysis for the each outcome
Control arm
summary(glm(as.formula(paste("control_arm", "~", paste(main_total_adjustment, collapse = "+"))), family = binomial(link = "logit"), data = main_dataset))
##
## Call:
## glm(formula = as.formula(paste("control_arm", "~", paste(main_total_adjustment,
## collapse = "+"))), family = binomial(link = "logit"), data = main_dataset)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0671 0.5013 0.5423 0.7178 0.8425
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.0564 0.1147 9.212 < 2e-16 ***
## covidTRUE 0.7861 0.1369 5.741 9.41e-09 ***
## sponsor_typeInvestigator -0.2031 0.2749 -0.739 0.460
## sponsor_typeNon industry 0.1683 0.1411 1.192 0.233
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1535.7 on 1607 degrees of freedom
## Residual deviance: 1495.0 on 1604 degrees of freedom
## (54 observations deleted due to missingness)
## AIC: 1503
##
## Number of Fisher Scoring iterations: 4
fit <- with(main_mice, glm(as.formula(paste("control_arm", "~", paste(main_total_adjustment, collapse = "+"))), family = binomial(link = "logit")))
pooled_fit <- mice::pool(fit)
pooled_fit$pooled
summary(pooled_fit)
Randomisation
summary(glm(as.formula(paste("randomisation", "~", paste(main_total_adjustment, collapse = "+"))), family = binomial(link = "logit"), data = main_dataset))
##
## Call:
## glm(formula = as.formula(paste("randomisation", "~", paste(main_total_adjustment,
## collapse = "+"))), family = binomial(link = "logit"), data = main_dataset)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8693 0.6189 0.6189 0.8259 0.9064
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.80510 0.10714 7.515 5.71e-14 ***
## covidTRUE 0.65540 0.12202 5.371 7.83e-08 ***
## sponsor_typeInvestigator -0.12772 0.25781 -0.495 0.620
## sponsor_typeNon industry 0.09512 0.12910 0.737 0.461
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1770.1 on 1607 degrees of freedom
## Residual deviance: 1737.0 on 1604 degrees of freedom
## (54 observations deleted due to missingness)
## AIC: 1745
##
## Number of Fisher Scoring iterations: 4
fit <- with(main_mice, glm(as.formula(paste("randomisation", "~", paste(main_total_adjustment, collapse = "+"))), family = binomial(link = "logit")))
pooled_fit <- mice::pool(fit)
pooled_fit$pooled
summary(pooled_fit)
Blinding
summary(glm(as.formula(paste("blinding", "~", paste(main_total_adjustment, collapse = "+"))), family = binomial(link = "logit"), data = main_dataset))
##
## Call:
## glm(formula = as.formula(paste("blinding", "~", paste(main_total_adjustment,
## collapse = "+"))), family = binomial(link = "logit"), data = main_dataset)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.249 -1.051 -1.042 1.309 1.319
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.16579 0.09632 1.721 0.0852 .
## covidTRUE -0.02227 0.10260 -0.217 0.8282
## sponsor_typeInvestigator -0.19603 0.22560 -0.869 0.3849
## sponsor_typeNon industry -0.46985 0.11165 -4.208 2.57e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2220.2 on 1607 degrees of freedom
## Residual deviance: 2201.0 on 1604 degrees of freedom
## (54 observations deleted due to missingness)
## AIC: 2209
##
## Number of Fisher Scoring iterations: 4
fit <- with(main_mice, glm(as.formula(paste("blinding", "~", paste(main_total_adjustment, collapse = "+"))), family = binomial(link = "logit")))
pooled_fit <- mice::pool(fit)
pooled_fit$pooled
summary(pooled_fit)
Prospective registration
summary(glm(as.formula(paste("prospective", "~", paste(main_total_adjustment, collapse = "+"))), family = binomial(link = "logit"), data = main_dataset))
##
## Call:
## glm(formula = as.formula(paste("prospective", "~", paste(main_total_adjustment,
## collapse = "+"))), family = binomial(link = "logit"), data = main_dataset)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9748 -1.4106 0.6174 0.8728 0.9608
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.5974 0.1238 12.901 < 2e-16 ***
## covidTRUE -0.2355 0.1147 -2.054 0.040 *
## sponsor_typeInvestigator 0.1990 0.3054 0.651 0.515
## sponsor_typeNon industry -0.8285 0.1349 -6.141 8.18e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1917.7 on 1607 degrees of freedom
## Residual deviance: 1857.7 on 1604 degrees of freedom
## (54 observations deleted due to missingness)
## AIC: 1865.7
##
## Number of Fisher Scoring iterations: 4
fit <- with(main_mice, glm(as.formula(paste("prospective", "~", paste(main_total_adjustment, collapse = "+"))), family = binomial(link = "logit")))
pooled_fit <- mice::pool(fit)
pooled_fit$pooled
summary(pooled_fit)
Indication-matched dataset
Use the mice command to generate an initial predictor matrix (indicating which column variables are used to predict the row variables) without making any imputations.
set.seed(5)
mice_in <- mice::mice(indication_for_mice, maxit = 0, print = FALSE)
# mice_in <- mice::mice(indication_dataset, maxit = 0, print = FALSE)
predictor_matrix <- mice_in$pred
# warning message: Number of logged events: 1
correlation_matrix <- round(cor(sapply(indication_for_mice[,c(myvars, indication_total_adjustment)], as.numeric), use = "pairwise.complete.obs"), 2)
# correlation_matrix <- round(cor(na.omit(values)), 2)
# keep only upper triangle
correlation_matrix[lower.tri(correlation_matrix)] <- NA
melted_correlation_matrix <- reshape2::melt(correlation_matrix)
# lower triangle
melted_correlation_matrix$Var2 <- factor(melted_correlation_matrix$Var2, levels(melted_correlation_matrix$Var2)[seq(length(levels(melted_correlation_matrix$Var2)), 1, by = -1)], ordered = TRUE)
labels_plot <- sapply(levels(melted_correlation_matrix[,1]), function(x) gsub("_", "", x))
ggplot(data = melted_correlation_matrix, aes(x = Var1, y = Var2, fill = value)) + geom_tile(color = "white") + xlab("") + ylab("") + scale_fill_gradient2(low = "darkorchid4", high = "green3", mid = "white", midpoint = 0, na.value = "white", limit = c(-1, 1), name = "Correlation") + scale_x_discrete(labels = labels_plot) + scale_y_discrete(labels = labels_plot[length(labels_plot):1]) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1),
panel.grid.major = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "white"),
axis.ticks = element_blank(),
legend.justification = c(1, 0),
legend.position = c(0.7, 0.7),
legend.direction = "horizontal") + guides(fill = guide_colorbar(barwidth = 7, barheight = 1, title.position = "top", title.hjust = 0.5))

# quickpred: quick selection procedure of predictors
# select predictors according to data relations with a minimum correlation of ρ = 0.25
mice_in2 <- mice::mice(indication_for_mice, pred = mice::quickpred(indication_for_mice, mincor = 0.25), print = FALSE)
mice_in2$pred
Modify the predictor matrix such that only sponsor type is imputed, using all variables.
predictor_matrix[,"covid"] <- 1 # no need to set element in row 'covid' to 0 because the entire row will be set to 0 below
predictor_matrix[which(!(row.names(predictor_matrix) %in% c("sponsor_type"))),] <- 0
Generate 10 imputed datasets using chained equations (using package mice).
indication_mice <- mice::mice(indication_for_mice, m = 10, pred = predictor_matrix, print = FALSE)
## Warning: Number of logged events: 50
attributes(indication_mice)
## $names
## [1] "data" "imp" "m" "where"
## [5] "blocks" "call" "nmis" "method"
## [9] "predictorMatrix" "visitSequence" "formulas" "post"
## [13] "blots" "ignore" "seed" "iteration"
## [17] "lastSeedValue" "chainMean" "chainVar" "loggedEvents"
## [21] "version" "date"
##
## $class
## [1] "mids"
Original data:
indication_mice$data %>% head
Imputed datasets:
indication_mice$imp %>% map(head)
## $control_arm
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $randomisation
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $blinding
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $prospective
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $source_registry
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $phase_clean
## 1 2 3 4 5 6 7 8
## 233 Undefined Phase 4 Phase 2 Phase 2 Undefined Phase 1 Phase 4 Phase 2
## 234 Phase 4 Phase 1 Phase 2 Phase 4 Phase 4 Phase 2 Phase 2 Phase 3
## 235 Phase 4 Phase 3 Phase 1 Phase 4 Undefined Phase 3 Phase 2 Phase 3
## 663 Phase 1 Phase 4 Phase 2 Phase 3 Undefined Phase 2 Phase 3 Phase 2
## 666 Phase 3 Phase 3 Phase 2 Phase 2 Phase 3 Phase 1 Undefined Undefined
## 686 Phase 2 Phase 4 Phase 2 Phase 3 Phase 3 Phase 1 Undefined Phase 3
## 9 10
## 233 Phase 4 Phase 4
## 234 Phase 3 Phase 3
## 235 Undefined Phase 1
## 663 Phase 3 Phase 1
## 666 Phase 3 Phase 3
## 686 Phase 3 Phase 3
##
## $region_Africa
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $region_N_America
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $region_L_America
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $region_Asia
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $region_Europe
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $region_Oceania
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $multicentre
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $primary_purpose
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $sponsor_type
## 1 2 3 4 5
## 121 Non industry Non industry Non industry Non industry Non industry
## 179 Industry Non industry Investigator Industry Industry
## 224 Non industry Non industry Non industry Non industry Non industry
## 686 Industry Non industry Non industry Industry Non industry
## 689 Non industry Non industry Non industry Non industry Non industry
## 690 Industry Industry Industry Non industry Non industry
## 6 7 8 9 10
## 121 Non industry Non industry Non industry Non industry Non industry
## 179 Non industry Industry Industry Non industry Investigator
## 224 Investigator Non industry Non industry Non industry Non industry
## 686 Industry Industry Non industry Non industry Non industry
## 689 Industry Industry Non industry Non industry Industry
## 690 Industry Non industry Non industry Industry Non industry
##
## $sample_size
## 1 2 3 4 5 6 7 8
## 350 2.995732 7.852828 4.605170 4.382027 8.665613 3.688879 5.703782 5.075174
## 679 2.995732 4.094345 4.691348 4.276666 5.521461 5.937536 5.703782 6.063785
## 1640 2.995732 4.787492 5.808142 4.276666 6.135565 5.937536 4.521789 5.075174
## 1642 4.605170 4.787492 5.808142 5.598422 3.637586 4.094345 5.703782 5.075174
## 9 10
## 350 5.298317 3.401197
## 679 4.605170 2.302585
## 1640 2.995732 2.302585
## 1642 4.094345 2.302585
##
## $vaccine
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $conventional
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $traditional
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $subject_blind
## 1 2 3 4 5 6 7 8 9 10
## 1 Yes No No Yes Yes Yes Yes Yes Yes No
## 3 Yes No Yes No No Yes Yes No Yes Yes
## 5 Yes No Yes No Yes No No No Yes No
## 6 Yes Yes Yes No No No No No Yes No
## 7 No Yes Yes Yes Yes No No Yes No Yes
## 8 Yes Yes No No Yes No No Yes No Yes
##
## $caregiver_blind
## 1 2 3 4 5 6 7 8 9 10
## 1 No Yes Yes No No Yes No No No Yes
## 3 Yes No No Yes No Yes No No Yes No
## 5 No No No No No No No Yes Yes No
## 6 Yes No No No No Yes No No No No
## 7 No No No No No No No No Yes No
## 8 Yes No No No No Yes Yes Yes No Yes
##
## $investigator_blind
## 1 2 3 4 5 6 7 8 9 10
## 1 No Yes No No No No No No No No
## 3 Yes No No No No No Yes Yes No Yes
## 5 Yes No No No No No Yes Yes No Yes
## 6 Yes No No Yes No Yes Yes Yes Yes Yes
## 7 No No No No Yes No No No No No
## 8 Yes Yes No No No No No No Yes No
##
## $outcome_blind
## 1 2 3 4 5 6 7 8 9 10
## 1 Yes No No Yes No Yes No No No No
## 3 No No No Yes No Yes Yes Yes Yes Yes
## 5 No No No No Yes No No Yes No No
## 6 Yes No No No No No No No No No
## 7 No Yes No No Yes No No No No No
## 8 No Yes Yes No No No Yes No No No
##
## $analyst_blind
## 1 2 3 4 5 6 7 8 9 10
## 1 No No No No No No No No No No
## 3 No No No No No No No No No No
## 5 No No No No No No No No No No
## 6 No No No No No No No No No No
## 7 No No No No No No No No No No
## 8 No No No No No No No No No No
##
## $covid
## [1] 1 2 3 4 5 6 7 8 9 10
## <0 rows> (or 0-length row.names)
##
## $start_year
## 1 2 3 4 5 6 7 8 9 10
## 32 2019 2020 2020 2020 2020 2018 2019 2020 2018 2016
## 36 2017 2020 2020 2020 2017 2016 2019 2017 2013 2020
## 41 2017 2020 2019 2020 2019 2018 2017 2019 2020 2020
## 63 2019 2016 2020 2020 2019 2016 2019 2020 2018 2020
## 66 2017 2020 2020 2020 2017 2020 2018 2019 2020 2020
## 72 2017 2016 2020 2016 2020 2018 2020 2020 2013 2020
# In order to get the third imputed data set, use the complete() function
Only impute sponsor type.
method_vector <- indication_mice$method
method_vector[-15] <- ""
# methods(mice)
Generate 10 imputed datasets using the updated method vector.
indication_mice <- mice::mice(indication_for_mice, method = method_vector, m = 10, pred = predictor_matrix, print = FALSE)
## Warning: Number of logged events: 50
plot(indication_mice)

Check that there is no trend with further iterations and that the lines mix.
indication_mice_40 <- mice::mice.mids(indication_mice, maxit = 40, print = FALSE)
plot(indication_mice_40)

Plot of observed (blue) and imputed (red) phase:
mice::stripplot(indication_mice, phase_clean ~ .imp, pch = 20, cex = 2)

# Under MCAR, univariate distributions of the observed and imputed data are expected to be identical. Under MAR, they can be different, both in location and spread, but their multivariate distribution is assumed to be identical.
mice::stripplot(indication_mice)

Analysis
Logistic regression analysis on the multiply imputed data.
indication_total_models_mice <- lapply(myvars, function (x){
fit <- with(indication_mice, glm(as.formula(paste(x, "~", paste(indication_total_adjustment, collapse = "+"))),
family = binomial(link = "logit")))
pooled_fit <- mice::pool(fit)
return(list("fit" = fit, "pooled_fit" = pooled_fit))
})
names(indication_total_models_mice) <- myvars
lapply(indication_total_models_mice, function(x) summary(x$pooled_fit))
## $control_arm
## term estimate std.error statistic df p.value
## 1 (Intercept) 2.09467435 0.1630852 12.8440478 1635.510 0.0000000
## 2 covidTRUE -0.09703931 0.1521018 -0.6379892 1638.837 0.5235698
## 3 sponsor_typeInvestigator -0.20713098 0.3467191 -0.5974028 1588.804 0.5503236
## 4 sponsor_typeNon industry -0.07485039 0.1756249 -0.4261946 1634.875 0.6700222
##
## $randomisation
## term estimate std.error statistic df p.value
## 1 (Intercept) 1.7735724 0.1444615 12.2771272 1636.083 0.00000000
## 2 covidTRUE -0.0406002 0.1301694 -0.3119027 1638.837 0.75515413
## 3 sponsor_typeInvestigator -0.1828022 0.3151404 -0.5800660 1602.453 0.56195166
## 4 sponsor_typeNon industry -0.2912784 0.1545898 -1.8842020 1635.489 0.05971489
##
## $blinding
## term estimate std.error statistic df p.value
## 1 (Intercept) 1.0926804 0.1143869 9.552493 1634.362 0.000000e+00
## 2 covidTRUE -0.6517555 0.1023601 -6.367279 1638.777 2.492386e-10
## 3 sponsor_typeInvestigator -0.4030700 0.2430830 -1.658157 1625.728 9.747860e-02
## 4 sponsor_typeNon industry -0.8962649 0.1202718 -7.451997 1630.631 1.483258e-13
##
## $prospective
## term estimate std.error statistic df
## 1 (Intercept) 1.01030138 0.1162402 8.6914985 1638.120
## 2 covidTRUE 0.09736661 0.1064722 0.9144792 1638.837
## 3 sponsor_typeInvestigator 0.21287662 0.2817292 0.7556071 1637.035
## 4 sponsor_typeNon industry -0.48652144 0.1252574 -3.8841727 1637.490
## p.value
## 1 0.0000000000
## 2 0.3605996495
## 3 0.4499934889
## 4 0.0001067652
sum_indication_total_mice <- lapply(indication_total_models_mice, function(x) summary(x$pooled_fit))
# OR and 95% CI (Bonferroni corrected)
pool_OR_indication_total_mice <- lapply(sum_indication_total_mice, function(x) {
cbind(exp(cbind(x[2,2], (x[2,2]-z*(x[2,3])),
(x[2,2]+z*(x[2,3])))), x[2,6])
})
pool_OR_indication_total_mice <- do.call(rbind.data.frame, pool_OR_indication_total_mice)
pool_OR_indication_total_mice$Analysis <- "IM total (7)"
fmi: fraction of information about the coefficients missing due to nonresponse
lambda: proportion of the variation attributable to the missing data
mice::densityplot(indication_mice, ~ sponsor_type)

Compare complete case analysis with MICE analysis for the each outcome
Control arm
summary(glm(as.formula(paste("control_arm", "~", paste(indication_total_adjustment, collapse = "+"))), family = binomial(link = "logit"), data = indication_dataset))
##
## Call:
## glm(formula = as.formula(paste("control_arm", "~", paste(indication_total_adjustment,
## collapse = "+"))), family = binomial(link = "logit"), data = indication_dataset)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.1126 0.4766 0.5016 0.5207 0.5529
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.11802 0.16488 12.846 <2e-16 ***
## covidTRUE -0.08003 0.15282 -0.524 0.600
## sponsor_typeInvestigator -0.23704 0.34704 -0.683 0.495
## sponsor_typeNon industry -0.10833 0.17774 -0.609 0.542
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1204.1 on 1638 degrees of freedom
## Residual deviance: 1203.1 on 1635 degrees of freedom
## (18 observations deleted due to missingness)
## AIC: 1211.1
##
## Number of Fisher Scoring iterations: 4
fit <- with(indication_mice, glm(as.formula(paste("control_arm", "~", paste(indication_total_adjustment, collapse = "+"))), family = binomial(link = "logit")))
pooled_fit <- mice::pool(fit)
pooled_fit$pooled
summary(pooled_fit)
Randomisation
summary(glm(as.formula(paste("randomisation", "~", paste(indication_total_adjustment, collapse = "+"))), family = binomial(link = "logit"), data = indication_dataset))
##
## Call:
## glm(formula = as.formula(paste("randomisation", "~", paste(indication_total_adjustment,
## collapse = "+"))), family = binomial(link = "logit"), data = indication_dataset)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9725 0.5554 0.6417 0.6505 0.6505
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.79103 0.14566 12.296 <2e-16 ***
## covidTRUE -0.02994 0.13063 -0.229 0.8187
## sponsor_typeInvestigator -0.20445 0.31530 -0.648 0.5167
## sponsor_typeNon industry -0.31543 0.15604 -2.021 0.0432 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1523.7 on 1638 degrees of freedom
## Residual deviance: 1519.3 on 1635 degrees of freedom
## (18 observations deleted due to missingness)
## AIC: 1527.3
##
## Number of Fisher Scoring iterations: 4
fit <- with(indication_mice, glm(as.formula(paste("randomisation", "~", paste(indication_total_adjustment, collapse = "+"))), family = binomial(link = "logit")))
pooled_fit <- mice::pool(fit)
pooled_fit$pooled
summary(pooled_fit)
Blinding
summary(glm(as.formula(paste("blinding", "~", paste(indication_total_adjustment, collapse = "+"))), family = binomial(link = "logit"), data = indication_dataset))
##
## Call:
## glm(formula = as.formula(paste("blinding", "~", paste(indication_total_adjustment,
## collapse = "+"))), family = binomial(link = "logit"), data = indication_dataset)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.6722 -1.2006 0.7533 1.0985 1.3712
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.1144 0.1151 9.682 < 2e-16 ***
## covidTRUE -0.6333 0.1026 -6.171 6.77e-10 ***
## sponsor_typeInvestigator -0.4266 0.2435 -1.752 0.0798 .
## sponsor_typeNon industry -0.9259 0.1211 -7.648 2.04e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2264.3 on 1638 degrees of freedom
## Residual deviance: 2155.6 on 1635 degrees of freedom
## (18 observations deleted due to missingness)
## AIC: 2163.6
##
## Number of Fisher Scoring iterations: 4
fit <- with(indication_mice, glm(as.formula(paste("blinding", "~", paste(indication_total_adjustment, collapse = "+"))), family = binomial(link = "logit")))
pooled_fit <- mice::pool(fit)
pooled_fit$pooled
summary(pooled_fit)
Prospective registration
summary(glm(as.formula(paste("prospective", "~", paste(indication_total_adjustment, collapse = "+"))), family = binomial(link = "logit"), data = indication_dataset))
##
## Call:
## glm(formula = as.formula(paste("prospective", "~", paste(indication_total_adjustment,
## collapse = "+"))), family = binomial(link = "logit"), data = indication_dataset)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.7604 -1.4081 0.7912 0.9298 0.9631
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.00097 0.11622 8.613 < 2e-16 ***
## covidTRUE 0.08724 0.10658 0.819 0.413071
## sponsor_typeInvestigator 0.22247 0.28183 0.789 0.429894
## sponsor_typeNon industry -0.47339 0.12543 -3.774 0.000161 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2070.4 on 1638 degrees of freedom
## Residual deviance: 2050.2 on 1635 degrees of freedom
## (18 observations deleted due to missingness)
## AIC: 2058.2
##
## Number of Fisher Scoring iterations: 4
fit <- with(indication_mice, glm(as.formula(paste("prospective", "~", paste(indication_total_adjustment, collapse = "+"))), family = binomial(link = "logit")))
pooled_fit <- mice::pool(fit)
pooled_fit$pooled
summary(pooled_fit)
E-value (analysis 8)
E-values are calculated below.
#control arm main direct
control_direct_e <- EValue::evalues.OR(est = 2.18,
lo = 1.48,
hi = 3.21,
rare = 0,
# 1 if the outcome <15% at end of follow up
true = 1)
# odds ratio for which we want to calculate e-value (i.e. H0)
# randomisation main direct
randomisation_direct_e <- EValue::evalues.OR(est = 1.9,
lo = 1.34,
hi = 2.69,
rare = 0,
true = 1)
# control main total
control_total_e <- EValue::evalues.OR(est = 2.2,
lo = 1.57,
hi = 3.08,
rare = 0,
true = 1)
# randomisation main total
randomisation_total_e <- EValue::evalues.OR(est = 1.93,
lo = 1.42,
hi = 2.6,
rare = 0,
true = 1)
bind_rows(control_direct_e[2,],
randomisation_direct_e[2,],
control_total_e[2,],
randomisation_total_e[2,]) %>%
rename(Point = point,
Lower = lower) %>%
mutate(Analysis = c("Control arm direct effect",
"Randomisation direct effect",
"Control arm total effect",
"Randomisation total effect")) %>%
select(Analysis, everything(), -upper)
Geographic regions as confounders (analysis 9)
Analyses 6 and 7 are repeated with geographical regions included as coviariates.
# analysis 6 repeated
locations <- main_dataset %>%
select(region_Africa:region_Oceania) %>%
names()
region_sens_adjustment <- c(main_total_adjustment, locations)
main_sens_models_mice <- lapply(myvars, function (x){
fit <- with(main_mice, glm(as.formula(paste(x, "~", paste(region_sens_adjustment, collapse = "+"))),
family = binomial(link = "logit")))
pooled_fit <- mice::pool(fit)
return(list("fit" = fit, "pooled_fit" = pooled_fit))
})
names(main_sens_models_mice) <- myvars
lapply(main_sens_models_mice, function(x) summary(x$pooled_fit))
## $control_arm
## term estimate std.error statistic df
## 1 (Intercept) 0.92463459 0.2255080 4.1002295 1521.5804
## 2 covidTRUE 0.80549532 0.1393394 5.7808154 1626.6395
## 3 sponsor_typeInvestigator -0.33981929 0.2770554 -1.2265392 562.7597
## 4 sponsor_typeNon industry 0.15503660 0.1637808 0.9466106 1489.5462
## 5 region_AfricaYes 1.04978281 0.3827758 2.7425524 1626.7237
## 6 region_N_AmericaYes -0.25923654 0.1710189 -1.5158359 1615.8124
## 7 region_L_AmericaYes 0.54001880 0.2497859 2.1619262 1626.2323
## 8 region_AsiaYes 0.22296493 0.1588778 1.4033740 1625.2715
## 9 region_EuropeYes -0.01444888 0.1613035 -0.0895757 1616.3551
## 10 region_OceaniaYes -0.02872399 0.2848034 -0.1008555 1626.2243
## p.value
## 1 4.345463e-05
## 2 8.894384e-09
## 3 2.205088e-01
## 4 3.439908e-01
## 5 6.163213e-03
## 6 1.297564e-01
## 7 3.076932e-02
## 8 1.606964e-01
## 9 9.286355e-01
## 10 9.196776e-01
##
## $randomisation
## term estimate std.error statistic df
## 1 (Intercept) 0.71700211 0.2103646 3.40837758 1549.969
## 2 covidTRUE 0.66224084 0.1247963 5.30657231 1626.803
## 3 sponsor_typeInvestigator -0.22181111 0.2613072 -0.84885190 655.925
## 4 sponsor_typeNon industry 0.06337223 0.1504714 0.42115794 1524.426
## 5 region_AfricaYes 0.76740848 0.3124267 2.45628309 1626.824
## 6 region_N_AmericaYes -0.37650700 0.1584059 -2.37684945 1618.033
## 7 region_L_AmericaYes 0.82777473 0.2392496 3.45987956 1626.464
## 8 region_AsiaYes 0.21476724 0.1467190 1.46379933 1625.267
## 9 region_EuropeYes -0.02558895 0.1486251 -0.17217112 1619.364
## 10 region_OceaniaYes -0.01308082 0.2671611 -0.04896231 1626.344
## p.value
## 1 6.702483e-04
## 2 1.270782e-07
## 3 3.962734e-01
## 4 6.736991e-01
## 5 1.414211e-02
## 6 1.757693e-02
## 7 5.543722e-04
## 8 1.434422e-01
## 9 8.633245e-01
## 10 9.609553e-01
##
## $blinding
## term estimate std.error statistic df
## 1 (Intercept) 0.12718969 0.1783292 0.7132298 1564.9693
## 2 covidTRUE -0.04401346 0.1056857 -0.4164560 1626.8380
## 3 sponsor_typeInvestigator -0.31023022 0.2323944 -1.3349299 755.6118
## 4 sponsor_typeNon industry -0.34618844 0.1299903 -2.6631868 1554.3899
## 5 region_AfricaYes 0.33955767 0.2165902 1.5677428 1626.8380
## 6 region_N_AmericaYes -0.01912501 0.1367622 -0.1398413 1619.8623
## 7 region_L_AmericaYes 0.75217242 0.1743395 4.3144121 1626.2338
## 8 region_AsiaYes -0.18990030 0.1200960 -1.5812372 1625.9589
## 9 region_EuropeYes -0.18954087 0.1256447 -1.5085464 1621.4527
## 10 region_OceaniaYes 0.06774484 0.2329078 0.2908654 1626.4096
## p.value
## 1 0.475809928
## 2 0.677131273
## 3 0.182301346
## 4 0.007820362
## 5 0.117135643
## 6 0.888802739
## 7 0.000016966
## 8 0.114018298
## 9 0.131609552
## 10 0.771191323
##
## $prospective
## term estimate std.error statistic df
## 1 (Intercept) 0.56771304 0.2341241 2.4248376 1543.6665
## 2 covidTRUE -0.02961938 0.1161146 -0.2550874 1626.7412
## 3 sponsor_typeInvestigator 0.38610170 0.2880821 1.3402487 475.9444
## 4 sponsor_typeNon industry -0.32474564 0.1477735 -2.1975899 1465.0033
## 5 region_AfricaYes 0.59391237 0.2847599 2.0856602 1626.8380
## 6 region_N_AmericaYes 0.87886175 0.1845045 4.7633619 1618.0177
## 7 region_L_AmericaYes -0.27258522 0.2136349 -1.2759393 1624.6454
## 8 region_AsiaYes 0.25516163 0.1703336 1.4980107 1623.9924
## 9 region_EuropeYes 0.52142650 0.1713305 3.0433961 1620.9812
## 10 region_OceaniaYes 2.66495323 0.7242927 3.6793870 1626.8065
## p.value
## 1 1.542936e-02
## 2 7.986878e-01
## 3 1.808040e-01
## 4 2.813438e-02
## 5 3.716492e-02
## 6 2.074215e-06
## 7 2.021594e-01
## 8 1.343248e-01
## 9 2.376853e-03
## 10 2.414117e-04
sum_main_sens_mice <- lapply(main_sens_models_mice, function(x) summary(x$pooled_fit))
# OR and 95% CI (Bonferroni corrected)
pool_OR_main_sens_mice <- lapply(sum_main_sens_mice, function(x) {
cbind(exp(cbind(x[2,2], (x[2,2]-z*(x[2,3])),
(x[2,2]+z*(x[2,3])))), x[2,6])
})
do.call(rbind.data.frame, pool_OR_main_sens_mice)
pool_OR_main_sens_mice <- do.call(rbind.data.frame, pool_OR_main_sens_mice)
pool_OR_main_sens_mice$Analysis <- "Main total sensitivity (9)"
# analysis 7 repeated
indication_sens_models_mice <- lapply(myvars, function (x){
fit <- with(indication_mice, glm(as.formula(paste(x, "~", paste(region_sens_adjustment, collapse = "+"))),
family = binomial(link = "logit")))
pooled_fit <- mice::pool(fit)
return(list("fit" = fit, "pooled_fit" = pooled_fit))
})
names(indication_sens_models_mice) <- myvars
lapply(indication_sens_models_mice, function(x) summary(x$pooled_fit))
## $control_arm
## term estimate std.error statistic df
## 1 (Intercept) 1.81275402 0.2882392 6.28906031 1630.035
## 2 covidTRUE -0.07999277 0.1546623 -0.51720917 1632.837
## 3 sponsor_typeInvestigator -0.10897752 0.3524012 -0.30924276 1580.015
## 4 sponsor_typeNon industry 0.00509626 0.1887061 0.02700633 1627.869
## 5 region_AfricaYes 0.90737149 0.4311709 2.10443568 1632.837
## 6 region_N_AmericaYes -0.04506925 0.2214176 -0.20354860 1632.618
## 7 region_L_AmericaYes 0.28267199 0.2926553 0.96588706 1632.837
## 8 region_AsiaYes 0.21874074 0.2091697 1.04575728 1632.767
## 9 region_EuropeYes 0.15527358 0.2115949 0.73382475 1632.717
## 10 region_OceaniaYes 0.40407508 0.4396095 0.91916817 1632.837
## p.value
## 1 4.092813e-10
## 2 6.050802e-01
## 3 7.571776e-01
## 4 9.784580e-01
## 5 3.549297e-02
## 6 8.387316e-01
## 7 3.342437e-01
## 8 2.958279e-01
## 9 4.631609e-01
## 10 3.581434e-01
##
## $randomisation
## term estimate std.error statistic df
## 1 (Intercept) 1.53340337 0.2610196 5.8746684 1630.579
## 2 covidTRUE -0.04529754 0.1328551 -0.3409544 1632.837
## 3 sponsor_typeInvestigator -0.08131805 0.3212962 -0.2530937 1594.090
## 4 sponsor_typeNon industry -0.22883974 0.1669144 -1.3710004 1628.672
## 5 region_AfricaYes 1.15845946 0.4012314 2.8872602 1632.837
## 6 region_N_AmericaYes -0.24843756 0.1973063 -1.2591469 1632.676
## 7 region_L_AmericaYes 0.48041701 0.2693637 1.7835254 1632.837
## 8 region_AsiaYes 0.20275768 0.1882694 1.0769550 1632.779
## 9 region_EuropeYes 0.21390278 0.1904730 1.1230082 1632.759
## 10 region_OceaniaYes 0.47610945 0.3874572 1.2288052 1632.837
## p.value
## 1 5.125650e-09
## 2 7.331818e-01
## 3 8.002284e-01
## 4 1.705637e-01
## 5 3.937275e-03
## 6 2.081573e-01
## 7 7.468650e-02
## 8 2.816595e-01
## 9 2.615991e-01
## 10 2.193219e-01
##
## $blinding
## term estimate std.error statistic df
## 1 (Intercept) 1.039578786 0.1923261 5.40529152 1629.438
## 2 covidTRUE -0.656840267 0.1050519 -6.25253037 1632.837
## 3 sponsor_typeInvestigator -0.309842999 0.2486467 -1.24611742 1622.256
## 4 sponsor_typeNon industry -0.806035155 0.1325098 -6.08283241 1624.866
## 5 region_AfricaYes 0.474082324 0.2216708 2.13867695 1632.837
## 6 region_N_AmericaYes -0.034358864 0.1438686 -0.23882122 1632.318
## 7 region_L_AmericaYes 0.563505302 0.1894365 2.97463957 1632.837
## 8 region_AsiaYes -0.232913209 0.1306805 -1.78231029 1632.837
## 9 region_EuropeYes -0.003958487 0.1343984 -0.02945337 1632.546
## 10 region_OceaniaYes 0.380242106 0.2762278 1.37655274 1632.837
## p.value
## 1 7.429188e-08
## 2 5.144472e-10
## 3 2.129012e-01
## 4 1.469325e-09
## 5 3.260984e-02
## 6 8.112742e-01
## 7 2.976381e-03
## 8 7.488441e-02
## 9 9.765066e-01
## 10 1.688393e-01
##
## $prospective
## term estimate std.error statistic df
## 1 (Intercept) 0.22922086 0.2093076 1.0951389 1631.216
## 2 covidTRUE 0.25804047 0.1105096 2.3350057 1632.837
## 3 sponsor_typeInvestigator 0.49507774 0.2896269 1.7093637 1628.929
## 4 sponsor_typeNon industry -0.15187081 0.1376295 -1.1034756 1628.710
## 5 region_AfricaYes 0.66931008 0.2539976 2.6351035 1632.837
## 6 region_N_AmericaYes 0.86141832 0.1685819 5.1097903 1632.729
## 7 region_L_AmericaYes -0.20001925 0.1995483 -1.0023603 1632.837
## 8 region_AsiaYes 0.03818506 0.1505652 0.2536115 1632.829
## 9 region_EuropeYes 0.60893932 0.1564052 3.8933433 1632.800
## 10 region_OceaniaYes 1.27487627 0.3719383 3.4276553 1632.837
## p.value
## 1 2.736175e-01
## 2 1.966375e-02
## 3 8.757407e-02
## 4 2.699837e-01
## 5 8.490747e-03
## 6 3.604149e-07
## 7 3.163181e-01
## 8 7.998277e-01
## 9 1.028542e-04
## 10 6.239365e-04
sum_indication_sens_mice <- lapply(indication_sens_models_mice, function(x) summary(x$pooled_fit))
# OR and 95% CI (Bonferroni corrected)
pool_OR_indication_sens_mice <- lapply(sum_indication_sens_mice, function(x) {
cbind(exp(cbind(x[2,2], (x[2,2]-z*(x[2,3])),
(x[2,2]+z*(x[2,3])))), x[2,6])
})
pool_OR_indication_sens_mice <- do.call(rbind.data.frame, pool_OR_indication_sens_mice)
pool_OR_indication_sens_mice$Analysis <- "IM total sensitivity (9)"
Analysis 10
Analysis without inferring outcomes.
Create datasets.
main_dataset_2_control <- main_dataset_2[!is.na(main_dataset_2$control_arm), ]
main_dataset_2_randomisation <- main_dataset_2[
!is.na(main_dataset_2$randomisation) &
main_dataset_2$randomisation != "Not applicable",
]
main_dataset_2_blinding <- main_dataset_2[!is.na(main_dataset_2$blinding), ]
indication_dataset_2_control <- indication_dataset_2[!is.na(indication_dataset_2$control_arm), ]
indication_dataset_2_randomisation <- indication_dataset_2[
!is.na(indication_dataset_2$randomisation) &
indication_dataset_2$randomisation != "Not applicable",
]
indication_dataset_2_blinding <- indication_dataset_2[!is.na(indication_dataset_2$blinding), ]
table(indication_dataset_2$control_arm, useNA = "a")
##
## No Yes <NA>
## 203 1453 1
table(indication_dataset_2$randomisation, useNA = "a")
##
## No Not applicable Yes <NA>
## 86 203 1362 6
table(indication_dataset_2$blinding, useNA = "a")
##
## No Yes <NA>
## 720 881 56
table(indication_dataset_2_control$control_arm, useNA = "a")
##
## No Yes <NA>
## 203 1453 0
table(indication_dataset_2_randomisation$randomisation, useNA = "a")
##
## No Not applicable Yes <NA>
## 86 0 1362 0
table(indication_dataset_2_blinding$blinding, useNA = "a")
##
## No Yes <NA>
## 720 881 0
# nothing to do for prospective
Main direct adjustment with only non-inferred outcomes.
main_direct_models_10 <- list()
main_direct_models_10[[1]] <- glm(
as.formula(paste("control_arm", "~", paste(main_direct_adjustment, collapse = "+"))),
family = binomial(link = "logit"),
data = main_dataset_2_control)
# source registry needs to be grouped further because it created convergence
# issues
main_dataset_2_randomisation <- main_dataset_2_randomisation %>%
mutate(source_registry = fct_lump_n(
main_dataset_2_randomisation$source_registry, n = 5))
main_direct_models_10[[2]]<- glm(
as.formula(paste("randomisation", "~", paste(main_direct_adjustment, collapse = "+"))),
family = binomial(link = "logit"),
data = main_dataset_2_randomisation)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
main_direct_models_10[[3]]<- glm(
as.formula(paste("blinding", "~", paste(main_direct_adjustment, collapse = "+"))),
family = binomial(link = "logit"),
data = main_dataset_2_blinding)
sjPlot::plot_models(main_direct_models_10, prefix.labels = "varname") + theme_bw() + geom_hline(yintercept = 1)

coef_names <- names(coef(main_direct_models_10[[1]]))
sjPlot::plot_models(main_direct_models_10,
prefix.labels = "varname",
rm.terms = coef_names[3:length(coef_names)])+
theme_bw() + geom_hline(yintercept = 1)

Indication matched direct adjustment with only non-inferred outcomes.
indication_direct_models_10 <- list()
indication_direct_models_10[[1]] <- glm(
as.formula(paste("control_arm", "~", paste(indication_direct_adjustment, collapse = "+"))),
family = binomial(link = "logit"),
data = indication_dataset_2_control)
indication_direct_models_10[[2]]<- glm(
as.formula(paste("randomisation", "~", paste(indication_direct_adjustment, collapse = "+"))),
family = binomial(link = "logit"),
data = indication_dataset_2_randomisation)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
indication_direct_models_10[[3]]<- glm(
as.formula(paste("blinding", "~", paste(indication_direct_adjustment, collapse = "+"))),
family = binomial(link = "logit"),
data = indication_dataset_2_blinding)
sjPlot::plot_models(indication_direct_models_10, prefix.labels = "varname") + theme_bw() + geom_hline(yintercept = 1)

coef_names <- names(coef(indication_direct_models_10[[1]]))
sjPlot::plot_models(indication_direct_models_10,
prefix.labels = "varname",
rm.terms = coef_names[3:length(coef_names)])+
theme_bw() + geom_hline(yintercept = 1)

Main total adjustment with only non-inferred outcomes.
main_total_models_10 <- list()
main_total_models_10[[1]] <- glm(
as.formula(paste("control_arm", "~", paste(main_total_adjustment, collapse = "+"))),
family = binomial(link = "logit"),
data = main_dataset_2_control)
main_total_models_10[[2]]<- glm(
as.formula(paste("randomisation", "~", paste(main_total_adjustment, collapse = "+"))),
family = binomial(link = "logit"),
data = main_dataset_2_randomisation)
main_total_models_10[[3]]<- glm(
as.formula(paste("blinding", "~", paste(main_total_adjustment, collapse = "+"))),
family = binomial(link = "logit"),
data = main_dataset_2_blinding)
sjPlot::plot_models(main_total_models_10, prefix.labels = "varname") + theme_bw() + geom_hline(yintercept = 1)

Indication matched total adjustment with only non-inferred outcomes.
indication_total_models_10 <- list()
indication_total_models_10[[1]] <- glm(
as.formula(paste("control_arm", "~", paste(indication_total_adjustment, collapse = "+"))),
family = binomial(link = "logit"),
data = indication_dataset_2_control)
indication_total_models_10[[2]]<- glm(
as.formula(paste("randomisation", "~", paste(indication_total_adjustment, collapse = "+"))),
family = binomial(link = "logit"),
data = indication_dataset_2_randomisation)
indication_total_models_10[[3]]<- glm(
as.formula(paste("blinding", "~", paste(indication_total_adjustment, collapse = "+"))),
family = binomial(link = "logit"),
data = indication_dataset_2_blinding)
sjPlot::plot_models(indication_total_models_10, prefix.labels = "varname") + theme_bw() + geom_hline(yintercept = 1)

Overall results
colnames(pool_OR_main_direct_mice) <- colnames(pool_OR_indication_direct_mice)
result <- bind_rows(pool_OR_main_direct_mice,
pool_OR_indication_direct_mice,
pool_OR_main_total_mice,
pool_OR_indication_total_mice,
pool_OR_main_sens_mice,
pool_OR_indication_sens_mice)
colnames(result) <- c("Estimate", "Lower.CI", "Upper.CI", "P-Value", "Analysis")
result$Outcome <- rep(c("Control arm", "Randomisation", "Blinding" , "Prospective"), 6)
result <- result %>%
select(Analysis, Outcome, everything())
rownames(result) <- NULL
result_t <- result
result_t[3:6] <-lapply(result[3:6], signif, digits=3)
result_t$x <- paste0(result_t$Estimate, " [",
result_t$Lower.CI, "-", result_t$Upper.CI, "]")
result_t
x <- result %>%
transform(
Analysis = factor(
Analysis, levels = c("Main direct (4)",
"IM direct (5)",
"Main total (6)",
"IM total (7)",
"Main total sensitivity (9)",
"IM total sensitivity (9)")),
Outcome = factor(
Outcome, levels = c("Prospective",
"Blinding",
"Randomisation",
"Control arm")
))
p <- ggplot(x, aes(Outcome, Estimate)) +
geom_point() +
geom_errorbar(aes(ymin = Lower.CI, ymax = Upper.CI)) +
geom_hline(yintercept=1, linetype="dashed", color = "red") +
labs(y = "Odds ratio")
p <- p + coord_flip()
p + facet_wrap(~Analysis, ncol = 1) +
theme_light()

Analysis 10 summary results
all_models_10 <- c(main_direct_models_10, indication_direct_models_10,
main_total_models_10, indication_total_models_10)
names(all_models_10) <- rep(c("control_arm", "randomisation", "blinding"), 4)
sum_10 <- lapply(all_models_10, summary)
sum_10 <- lapply(sum_10, function (x) x[["coefficients"]])
pool_OR_all_10 <- lapply(sum_10, function(x) {
cbind(exp(cbind(x[2,1], (x[2,1]-z*(x[2,2])),
(x[2,1]+z*(x[2,2])))), x[2,4])
})
result_10 <- do.call(rbind.data.frame, pool_OR_all_10)
colnames(result_10) <- c("Estimate", "Lower.CI", "Upper.CI", "P-Value")
result_10$Outcome <- rep(c("Control arm", "Randomisation", "Blinding"), 4)
result_10$Analysis <- c(rep("Main direct (4)", 3),
rep("IM direct (5)", 3),
rep("Main total (6)", 3),
rep("IM total (7)", 3))
result_10 <- result_10 %>%
select(Analysis, Outcome, everything())
rownames(result_10) <- NULL
result_10_t <- result_10
result_10_t[3:6] <-lapply(result_10[3:6], signif, digits=3)
result_10_t$x <- paste0(result_10_t$Estimate, " [",
result_10_t$Lower.CI, "-", result_10_t$Upper.CI, "]")
result_10_t
x <- result_10 %>%
transform(
Analysis = factor(
Analysis, levels = c("Main direct (4)",
"IM direct (5)",
"Main total (6)",
"IM total (7)")),
Outcome = factor(
Outcome, levels = c("Blinding",
"Randomisation",
"Control arm")
))
p <- ggplot(x, aes(Outcome, Estimate)) +
geom_point() +
geom_errorbar(aes(ymin = Lower.CI, ymax = Upper.CI)) +
geom_hline(yintercept=1, linetype="dashed", color = "red") +
labs(y = "Odds ratio")
p <- p + coord_flip()
p + facet_wrap(~Analysis, ncol = 1) +
theme_light()

Exploratory analysis
The code used to define the adjustment sets is given in a separate script.
• Control arm: sample size, sponsor type, regions, phase, intervention type, and primary purpose. • Randomisation: sample size, sponsor type, region, control arm, multicentre, vaccine (i.e. a particular intervention type) and primary purpose • Blinding: sample size, sponsor type, randomisation, region, control arm, phase, intervention type, primary purpose • Prospective registration: sample size, sponsor type, region, phase, conventional (i.e. a particular intervention type)
control_adjustment <- c("covid",
"sample_size", "sponsor_type",
"region_Africa", "region_N_America",
"region_L_America", "region_N_America",
"region_Oceania",
"phase_clean",
"vaccine", "conventional", "traditional",
"primary_purpose")
randomisation_adjustment <- c("covid",
"sample_size", "sponsor_type",
"region_Africa", "region_N_America",
"region_L_America", "region_N_America",
"region_Oceania",
#"control_arm", leads to issues with convergence
"multicentre",
"vaccine", "primary_purpose")
blinding_adjustment <- c("covid",
"sample_size", "sponsor_type",
"randomisation",
"region_Africa", "region_N_America",
"region_L_America", "region_N_America",
"region_Oceania",
# "control_arm", leads to issues with convergence
"phase_clean",
"vaccine", "conventional", "traditional",
"primary_purpose")
prospective_adjustment <- c("covid",
"sample_size", "sponsor_type",
"region_Africa", "region_N_America",
"region_L_America", "region_N_America",
"region_Oceania",
"phase_clean",
"conventional")
Complete cases are used for the exploratory analysis.
d1 <- d %>%
select(study_arm:traditional) %>%
filter(study_arm == "covid" |
study_arm == "main")
d1 <- d1[complete.cases(d1),]
d1$covid <- ifelse(d1$study_arm == "covid", "Yes", "No")
control_exp <- glm(as.formula(
paste0("control_arm", "~",
paste(control_adjustment, collapse = "+"))),
family = binomial(link = "logit"),
data = d1)
sjPlot::plot_model(control_exp, ci.lvl = 1-0.0125)

sjPlot::tab_model(control_exp)
|
|
Control arm
|
|
Predictors
|
Odds Ratios
|
CI
|
p
|
|
(Intercept)
|
0.14
|
0.03 – 0.59
|
0.011
|
|
covidYes
|
2.18
|
1.60 – 2.97
|
<0.001
|
|
Sample size
|
2.13
|
1.83 – 2.49
|
<0.001
|
Sponsor type: Investigator
|
0.81
|
0.44 – 1.55
|
0.516
|
Sponsor type: Non industry
|
1.06
|
0.75 – 1.49
|
0.739
|
|
Africa: Yes
|
1.75
|
0.84 – 4.13
|
0.162
|
|
North America: Yes
|
0.58
|
0.41 – 0.81
|
0.001
|
|
Latin America: Yes
|
1.03
|
0.61 – 1.81
|
0.907
|
|
Oceania: Yes
|
0.65
|
0.35 – 1.23
|
0.172
|
|
Phase: Phase 2
|
0.72
|
0.47 – 1.10
|
0.131
|
|
Phase: Phase 3
|
1.66
|
0.97 – 2.85
|
0.065
|
|
Phase: Phase 4
|
1.02
|
0.55 – 1.91
|
0.958
|
|
Phase: Undefined
|
0.76
|
0.44 – 1.32
|
0.331
|
|
Vaccine: Yes
|
0.76
|
0.20 – 3.24
|
0.688
|
|
Conventional: Yes
|
2.11
|
0.63 – 8.94
|
0.261
|
|
Traditional: Yes
|
4.37
|
1.40 – 18.37
|
0.022
|
Primary purpose: Prevention
|
0.86
|
0.39 – 1.95
|
0.720
|
Primary purpose: Treatment
|
0.44
|
0.25 – 0.76
|
0.004
|
|
Observations
|
1539
|
|
R2 Tjur
|
0.188
|
randomisation_exp <- glm(as.formula(
paste0("randomisation", "~",paste(randomisation_adjustment, collapse = "+"))),
family = binomial(link = "logit"),
data = d1)
sjPlot::plot_model(randomisation_exp, ci.lvl = 1-0.0125)

sjPlot::tab_model(randomisation_exp)
|
|
randomisation
|
|
Predictors
|
Odds Ratios
|
CI
|
p
|
|
(Intercept)
|
0.12
|
0.06 – 0.24
|
<0.001
|
|
covidYes
|
1.81
|
1.38 – 2.39
|
<0.001
|
|
Sample size
|
2.46
|
2.14 – 2.86
|
<0.001
|
Sponsor type: Investigator
|
0.82
|
0.46 – 1.51
|
0.517
|
Sponsor type: Non industry
|
0.94
|
0.67 – 1.30
|
0.693
|
|
Africa: Yes
|
1.29
|
0.69 – 2.57
|
0.445
|
|
North America: Yes
|
0.46
|
0.33 – 0.63
|
<0.001
|
|
Latin America: Yes
|
1.50
|
0.91 – 2.57
|
0.121
|
|
Oceania: Yes
|
0.70
|
0.39 – 1.27
|
0.231
|
|
Multicentre: Yes
|
0.71
|
0.51 – 0.97
|
0.032
|
|
Vaccine: Yes
|
0.25
|
0.12 – 0.53
|
<0.001
|
Primary purpose: Prevention
|
0.78
|
0.39 – 1.55
|
0.473
|
Primary purpose: Treatment
|
0.62
|
0.38 – 1.00
|
0.057
|
|
Observations
|
1539
|
|
R2 Tjur
|
0.203
|
blinding_exp <- glm(as.formula(
paste0("blinding", "~",paste(blinding_adjustment, collapse = "+"))),
family = binomial(link = "logit"),
data = d1)
sjPlot::plot_model(blinding_exp, ci.lvl = 1-0.0125)

sjPlot::tab_model(blinding_exp)
|
|
Blinding
|
|
Predictors
|
Odds Ratios
|
CI
|
p
|
|
(Intercept)
|
0.01
|
0.00 – 0.04
|
<0.001
|
|
covidYes
|
0.57
|
0.44 – 0.75
|
<0.001
|
|
Sample size
|
0.96
|
0.85 – 1.07
|
0.429
|
Sponsor type: Investigator
|
1.00
|
0.55 – 1.84
|
0.999
|
Sponsor type: Non industry
|
0.61
|
0.44 – 0.84
|
0.002
|
|
randomisation: Yes
|
219.09
|
89.48 – 728.42
|
<0.001
|
|
Africa: Yes
|
1.00
|
0.60 – 1.66
|
0.986
|
|
North America: Yes
|
1.52
|
1.09 – 2.14
|
0.014
|
|
Latin America: Yes
|
1.71
|
1.12 – 2.65
|
0.015
|
|
Oceania: Yes
|
0.86
|
0.47 – 1.58
|
0.608
|
|
Phase: Phase 2
|
1.44
|
0.91 – 2.27
|
0.116
|
|
Phase: Phase 3
|
1.27
|
0.80 – 2.01
|
0.313
|
|
Phase: Phase 4
|
0.75
|
0.43 – 1.28
|
0.290
|
|
Phase: Undefined
|
1.67
|
0.99 – 2.83
|
0.056
|
|
Vaccine: Yes
|
2.51
|
0.85 – 7.88
|
0.104
|
|
Conventional: Yes
|
1.03
|
0.49 – 2.23
|
0.931
|
|
Traditional: Yes
|
1.11
|
0.57 – 2.20
|
0.758
|
Primary purpose: Prevention
|
2.57
|
1.38 – 4.83
|
0.003
|
Primary purpose: Treatment
|
0.94
|
0.58 – 1.51
|
0.805
|
|
Observations
|
1539
|
|
R2 Tjur
|
0.326
|
prospective_exp <- glm(as.formula(
paste0("prospective", "~",paste(prospective_adjustment, collapse = "+"))),
family = binomial(link = "logit"),
data = d1)
sjPlot::plot_model(prospective_exp, ci.lvl = 1-0.0125)

sjPlot::tab_model(prospective_exp)
|
|
Prospective registration
|
|
Predictors
|
Odds Ratios
|
CI
|
p
|
|
(Intercept)
|
1.94
|
1.04 – 3.67
|
0.039
|
|
covidYes
|
0.81
|
0.64 – 1.04
|
0.099
|
|
Sample size
|
1.16
|
1.05 – 1.28
|
0.003
|
Sponsor type: Investigator
|
1.51
|
0.83 – 2.91
|
0.192
|
Sponsor type: Non industry
|
0.58
|
0.43 – 0.78
|
<0.001
|
|
Africa: Yes
|
1.58
|
0.94 – 2.78
|
0.095
|
|
North America: Yes
|
1.87
|
1.36 – 2.57
|
<0.001
|
|
Latin America: Yes
|
0.67
|
0.45 – 1.01
|
0.054
|
|
Oceania: Yes
|
15.02
|
4.62 – 92.27
|
<0.001
|
|
Phase: Phase 2
|
1.62
|
1.09 – 2.38
|
0.016
|
|
Phase: Phase 3
|
0.99
|
0.65 – 1.48
|
0.945
|
|
Phase: Phase 4
|
1.35
|
0.82 – 2.22
|
0.239
|
|
Phase: Undefined
|
1.18
|
0.75 – 1.85
|
0.467
|
|
Conventional: Yes
|
0.64
|
0.44 – 0.92
|
0.018
|
|
Observations
|
1539
|
|
R2 Tjur
|
0.083
|
Outcome neutral criterion 4
Used to generate random 30 per arm to check.
set.seed(5)
cov_30 <- main_dataset %>%
filter(study_arm == "covid") %>%
sample_n(30)
main_30 <- main_dataset %>%
filter(study_arm == "main") %>%
sample_n(30)
im_30 <- indication_dataset %>%
filter(study_arm == "im") %>%
sample_n(30)
check_90 <- bind_rows(cov_30, main_30, im_30)
write_csv(check_90, file = "data/check/final_check.csv")